1 /*
2 * cmdargs.c
3 *
4 * $Id$
5 *
6 *****************************************************************************
7 *
8 * Copyright (c) 2004, Luke Sheneman
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 *
15 * + Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * + Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
20 * distribution.
21 * + The names of its contributors may not be used to endorse or promote
22 * products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 *
37 *****************************************************************************
38 *
39 * AUTHOR:
40 *
41 * Luke Sheneman
42 * sheneman@cs.uidaho.edu
43 *
44 */
45
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <time.h>
50
51
52 //#ifdef USE_GNU
53 //#include <getopt.h>
54 //#else
55 #include "getopt_long.h"
56 //#endif /* USE_GNU*/
57
58
59 #include "clearcut.h"
60 #include "cmdargs.h"
61
62
63 /*
64 * NJ_handle_args() -
65 *
66 */
67 NJ_ARGS *
NJ_handle_args(int argc,char * argv[])68 NJ_handle_args(int argc,
69 char *argv[]) {
70
71 static NJ_ARGS nj_args;
72 int option_index, c;
73
74 optind = 0; //neccasary to read in arguments if code is run more than once
75
76 struct option NJ_long_options[] = {
77
78 /* These options don't set a flag */
79 {"in", required_argument, NULL, 'i'},
80 {"out", required_argument, NULL, 'o'},
81 {"seed", required_argument, NULL, 's'},
82 {"matrixout", required_argument, NULL, 'm'},
83 {"ntrees", required_argument, NULL, 'n'},
84
85 /* These options set a flag */
86 {"verbose", no_argument, &(nj_args.verbose_flag), 1},
87 {"quiet", no_argument, &(nj_args.quiet_flag), 1},
88 {"distance", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_DISTANCE},
89 {"alignment", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_ALIGNED_SEQUENCES},
90 {"help", no_argument, &(nj_args.help), 1},
91 {"version", no_argument, &(nj_args.version), 1},
92 {"norandom", no_argument, &(nj_args.norandom), 1},
93 {"shuffle", no_argument, &(nj_args.shuffle), 1},
94 {"stdin", no_argument, &(nj_args.stdin_flag), 1},
95 {"stdout", no_argument, &(nj_args.stdout_flag), 1},
96 {"dna", no_argument, &(nj_args.dna_flag), 1},
97 {"DNA", no_argument, &(nj_args.dna_flag), 1},
98 {"protein", no_argument, &(nj_args.protein_flag), 1},
99 {"neighbor", no_argument, &(nj_args.neighbor), 1},
100 {"expblen", no_argument, &(nj_args.expblen), 1},
101 {"expdist", no_argument, &(nj_args.expdist), 1},
102
103 {"jukes", no_argument, &(nj_args.jukes_flag), 1},
104 {"kimura", no_argument, &(nj_args.kimura_flag), 1},
105
106 {0, 0, 0, 0}
107
108 };
109
110 /* initializes options to their default */
111 nj_args.infilename = NULL;
112 nj_args.outfilename = NULL;
113 nj_args.matrixout = NULL;
114 nj_args.seed = time(0);
115 nj_args.verbose_flag = 0;
116 nj_args.quiet_flag = 0;
117 nj_args.input_mode = NJ_INPUT_MODE_DISTANCE;
118 nj_args.help = 0;
119 nj_args.version = 0;
120 nj_args.norandom = 0;
121 nj_args.shuffle = 0;
122 nj_args.stdin_flag = 0;
123 nj_args.stdout_flag = 0;
124 nj_args.dna_flag = 0;
125 nj_args.protein_flag = 0;
126 nj_args.correction_model = NJ_MODEL_NONE;
127 nj_args.jukes_flag = 0;
128 nj_args.kimura_flag = 0;
129 nj_args.neighbor = 0;
130 nj_args.ntrees = 1;
131 nj_args.expblen = 0;
132 nj_args.expdist = 0;
133
134 while(1) {
135
136 c = getopt_long(argc,
137 argv,
138 "i:o:s:m:n:vqduahVSIOrDPjkNeE",
139 NJ_long_options,
140 &option_index);
141 if(c == -1) {
142 break;
143 }
144 //printf("%d\t%d\n", option_index, argc);
145 //for (int red = 0; red < argc; red++) { printf("%s\n", argv[red]); }
146 switch(c) {
147
148 case 0:
149 if(NJ_long_options[option_index].flag) {
150 break;
151 }
152
153 printf("option %s", NJ_long_options[option_index].name);
154 if(optarg) {
155 printf(" with arg %s", optarg);
156 }
157 printf("\n");
158 break;
159
160 case 'i':
161 nj_args.infilename = optarg;
162 break;
163
164 case 'o':
165 nj_args.outfilename = optarg;
166 break;
167
168 case 's':
169 nj_args.seed = atoi(optarg);
170 break;
171
172 case 'm':
173 nj_args.matrixout = optarg;
174 break;
175
176 case 'n':
177 nj_args.ntrees = atoi(optarg);
178 break;
179
180 case 'v':
181 nj_args.verbose_flag = 1;
182 break;
183
184 case 'q':
185 nj_args.quiet_flag = 1;
186 break;
187
188 case 'd':
189 nj_args.input_mode = NJ_INPUT_MODE_DISTANCE;
190 break;
191
192 case 'a':
193 nj_args.input_mode = NJ_INPUT_MODE_ALIGNED_SEQUENCES;
194 break;
195
196 case 'h':
197 nj_args.help = 1;
198 break;
199
200 case 'V':
201 nj_args.version = 1;
202 break;
203
204 case 'S':
205 nj_args.shuffle = 1;
206 break;
207
208 case 'I':
209 nj_args.stdin_flag = 1;
210 break;
211
212 case 'O':
213 nj_args.stdin_flag = 1;
214 break;
215
216 case 'r':
217 nj_args.norandom = 1;
218 break;
219
220 case 'D':
221 nj_args.dna_flag = 1;
222 break;
223
224 case 'P':
225 nj_args.protein_flag = 1;
226 break;
227
228 case 'j':
229 nj_args.jukes_flag = 1;
230 break;
231
232 case 'k':
233 nj_args.kimura_flag = 1;
234 break;
235
236 case 'N':
237 nj_args.neighbor = 1;
238 break;
239
240 case 'e':
241 nj_args.expblen = 1;
242 break;
243
244 case 'E':
245 nj_args.expdist = 1;
246 break;
247
248 default:
249 NJ_usage();
250 exit(-1);
251 }
252 }
253
254 if(optind < argc) {
255 fprintf(stderr, "Clearcut: Unknown command-line argument:\n --> %s\n", argv[optind]);
256 NJ_usage();
257 exit(-1);
258 }
259
260 if(nj_args.version) {
261 printf("Clearcut Version: %s\n", NJ_VERSION);
262 //exit(0);
263 }
264
265 if(nj_args.help) {
266 NJ_usage();
267 //exit(0);
268 }
269
270 /* if stdin & explicit filename are specified for input */
271 if(nj_args.stdin_flag) {
272 if(nj_args.infilename) {
273 fprintf(stderr, "Clearcut: Ambiguous input source specified. Specify input filename OR stdin.\n");
274 NJ_usage();
275 exit(-1);
276 }
277 }
278
279 /* if stdout & explicit filename are specified for output */
280 if(nj_args.stdout_flag) {
281 if(nj_args.outfilename) {
282 fprintf(stderr, "Clearcut: Ambiguous output specified. Specify output filename OR stdout.\n");
283 NJ_usage();
284 exit(-1);
285 }
286 }
287
288 /* if user did not specify stdin or filename, default to stdin */
289 if(!nj_args.stdin_flag) {
290 if(!nj_args.infilename) {
291
292 fprintf(stderr, "Clearcut: No input file specified. Using stdin.\n");
293 nj_args.stdin_flag = 1;
294 }
295 }
296
297 /* if user did not specify stdout or filename, default to stdout */
298 if(!nj_args.stdout_flag) {
299 if(!nj_args.outfilename) {
300
301 fprintf(stderr, "Clearcut: No output file specified. Using stdout.\n");
302 nj_args.stdout_flag = 1;
303 }
304 }
305
306 /* User must specify distance matrix or alignment */
307 if(nj_args.input_mode == NJ_INPUT_MODE_UNKNOWN) {
308 fprintf(stderr, "Clearcut: Must specify input type (--distance | --alignment)\n");
309 NJ_usage();
310 exit(-1);
311 }
312
313 /* do not allow protein or DNA options for distance matrix input */
314 if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) {
315 if(nj_args.dna_flag || nj_args.protein_flag) {
316 fprintf(stderr, "Clearcut: Ambiguous arguments. (--protein | --DNA) do not apply to distance \n");
317 NJ_usage();
318 exit(-1);
319 }
320 }
321
322 /* make sure different filenames were specified for input and output */
323 if(!nj_args.stdin_flag && !nj_args.stdout_flag) {
324
325 if(!strcmp(nj_args.infilename, nj_args.outfilename)) {
326 fprintf(stderr, "Clearcut: Input filename and output filename must be unique.\n");
327 NJ_usage();
328 exit(-1);
329 }
330 }
331
332 /* make sure that user specifies DNA or Protein if dealing with alignment input */
333 if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
334 if(!nj_args.dna_flag && !nj_args.protein_flag) {
335 fprintf(stderr, "Clearcut: Must specify protein or DNA for alignment input.\n");
336 NJ_usage();
337 exit(-1);
338 }
339 }
340
341 /* make sure that user does not specify both protein and DNA when dealing with alignment input */
342 if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
343 if(nj_args.dna_flag && nj_args.protein_flag) {
344 fprintf(stderr, "Clearcut: Specifying protein and DNA sequences are mutually exclusive options\n");
345 NJ_usage();
346 exit(-1);
347 }
348 }
349
350 /* make sure verbose and quiet were not specified together */
351 if(nj_args.verbose_flag && nj_args.quiet_flag) {
352 fprintf(stderr, "Clearcut: Verbose and Quiet mode are mutually exclusive.\n");
353 NJ_usage();
354 exit(-1);
355 }
356
357 /* make sure that a correction model was specified only when providing an alignment */
358 if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) {
359 if(nj_args.jukes_flag || nj_args.kimura_flag) {
360 fprintf(stderr, "Clearcut: Only specify correction model for alignment input.\n");
361 NJ_usage();
362 exit(-1);
363 }
364 } else {
365 if(nj_args.jukes_flag && nj_args.kimura_flag) {
366 fprintf(stderr, "Clearcut: Only specify one correction model\n");
367 NJ_usage();
368 exit(-1);
369 } else {
370 if(nj_args.jukes_flag && !nj_args.kimura_flag) {
371 nj_args.correction_model = NJ_MODEL_JUKES;
372 } else if(nj_args.kimura_flag && !nj_args.jukes_flag) {
373 nj_args.correction_model = NJ_MODEL_KIMURA;
374 } else {
375 nj_args.correction_model = NJ_MODEL_NONE; /* DEFAULT */
376 }
377 }
378 }
379
380 /* make sure that the number of output trees is reasonable */
381 if(nj_args.ntrees <= 0) {
382 fprintf(stderr, "Clearcut: Number of output trees must be a positive integer.\n");
383 NJ_usage();
384 exit(-1);
385 }
386
387 /*
388 * make sure that if exponential distances are specified,
389 * we are dealing with alignment input
390 */
391 if(nj_args.expdist && nj_args.input_mode != NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
392 fprintf(stderr, "Clearcut: Exponential notation for distance matrix output requires that input be an alignment\n");
393 NJ_usage();
394 exit(-1);
395 }
396
397 return(&nj_args);
398 }
399
400
401
402
403
404 /*
405 * NJ_print_args() -
406 *
407 */
408 void
NJ_print_args(NJ_ARGS * nj_args)409 NJ_print_args(NJ_ARGS *nj_args) {
410
411 char input_mode[32];
412
413 switch (nj_args->input_mode) {
414 case NJ_INPUT_MODE_DISTANCE:
415 sprintf(input_mode, "Distance Matrix");
416 break;
417 case NJ_INPUT_MODE_UNALIGNED_SEQUENCES:
418 sprintf(input_mode, "Unaligned Sequences");
419 break;
420 case NJ_INPUT_MODE_ALIGNED_SEQUENCES:
421 sprintf(input_mode, "Aligned Sequences");
422 break;
423 default:
424 sprintf(input_mode, "UNKNOWN");
425 break;
426 }
427
428 printf("\n*** Command Line Arguments ***\n");
429
430 printf("Input Mode: %s\n", input_mode);
431
432 if(nj_args->stdin_flag) {
433 printf("Input from STDIN\n");
434 } else {
435 printf("Input File: %s\n", nj_args->infilename);
436 }
437
438 if(nj_args->stdout_flag) {
439 printf("Output from STDOUT\n");
440 } else {
441 printf("Output File: %s\n", nj_args->outfilename);
442 }
443
444 if(nj_args->input_mode != NJ_INPUT_MODE_DISTANCE) {
445 if(nj_args->aligned_flag) {
446 printf("Input Sequences Aligned: YES\n");
447 } else {
448 printf("Input Sequences Aligned: NO\n");
449 }
450 }
451
452 if(nj_args->verbose_flag) {
453 printf("Verbose Mode: ON\n");
454 } else {
455 printf("Verbose Mode: OFF\n");
456 }
457
458 if(nj_args->quiet_flag) {
459 printf("Quiet Mode: ON\n");
460 } else {
461 printf("Quiet Mode: OFF\n");
462 }
463
464 if(nj_args->seed) {
465 printf("Random Seed: %d\n", nj_args->seed);
466 }
467
468 printf("\n*******\n");
469
470 return;
471 }
472
473
474
475
476 /*
477 * NJ_usage() -
478 *
479 * Print a usage message
480 *
481 */
482 void
NJ_usage(void)483 NJ_usage(void) {
484
485 printf("Usage: clearcut --in=<infilename> --out=<outfilename> [options]...\n");
486 printf("GENERAL OPTIONS:\n");
487 printf(" -h, --help Display this information.\n");
488 printf(" -V, --version Print the version of this program.\n");
489 printf(" -v, --verbose More output. (Default: OFF)\n");
490 printf(" -q, --quiet Silent operation. (Default: ON)\n");
491 printf(" -s, --seed=<seed> Explicitly set the PRNG seed to a specific value.\n");
492 printf(" -r, --norandom Attempt joins deterministically. (Default: OFF)\n");
493 printf(" -S, --shuffle Randomly shuffle the distance matrix. (Default: OFF)\n");
494 printf(" -N, --neighbor Use traditional Neighbor-Joining algorithm. (Default: OFF)\n");
495
496 printf("\n");
497 printf("INPUT OPTIONS:\n");
498 printf(" -I, --stdin Read input from STDIN.\n");
499 printf(" -d, --distance Input file is a distance matrix. (Default: ON)\n");
500 printf(" -a, --alignment Input file is a set of aligned sequences. (Default: OFF)\n");
501 printf(" -D, --DNA Input alignment are DNA sequences.\n");
502 printf(" -P, --protein Input alignment are protein sequences.\n");
503
504 printf("\n");
505 printf("CORRECTION MODEL FOR COMPUTING DISTANCE MATRIX (Default: NO Correction):\n");
506 printf(" -j, --jukes Use Jukes-Cantor correction for computing distance matrix.\n");
507 printf(" -k, --kimura Use Kimura correction for distance matrix.\n");
508
509 printf("\n");
510 printf("OUTPUT OPTIONS:\n");
511 printf(" -O, --stdout Output tree to STDOUT.\n");
512 printf(" -m, --matrixout=<file> Output distance matrix to specified file.\n");
513 printf(" -n, --ntrees=<n> Output n trees. (Default: 1)\n");
514 printf(" -e, --expblen Exponential notation for branch lengths. (Default: OFF)\n");
515 printf(" -E, --expdist Exponential notation in distance output. (Default: OFF)\n");
516
517 printf("\n");
518 printf("EXAMPLES:\n");
519 printf(" Compute tree by supplying distance matrix via stdin:\n");
520 printf(" clearcut --distance < distances.txt > treefile.tre\n");
521 printf("\n");
522 printf(" Compute tree by supplying an alignment of DNA sequences from a file:\n");
523 printf(" clearcut --alignment --DNA --in=alignment.txt --out=treefile.tre\n");
524
525 return;
526 }
527
528
529
530