1 /*
2
3 VSEARCH: a versatile open source tool for metagenomics
4
5 Copyright (C) 2014-2021, Torbjorn Rognes, Frederic Mahe and Tomas Flouri
6 All rights reserved.
7
8 Contact: Torbjorn Rognes <torognes@ifi.uio.no>,
9 Department of Informatics, University of Oslo,
10 PO Box 1080 Blindern, NO-0316 Oslo, Norway
11
12 This software is dual-licensed and available under a choice
13 of one of two licenses, either under the terms of the GNU
14 General Public License version 3 or the BSD 2-Clause License.
15
16
17 GNU General Public License version 3
18
19 This program is free software: you can redistribute it and/or modify
20 it under the terms of the GNU General Public License as published by
21 the Free Software Foundation, either version 3 of the License, or
22 (at your option) any later version.
23
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 GNU General Public License for more details.
28
29 You should have received a copy of the GNU General Public License
30 along with this program. If not, see <http://www.gnu.org/licenses/>.
31
32
33 The BSD 2-Clause License
34
35 Redistribution and use in source and binary forms, with or without
36 modification, are permitted provided that the following conditions
37 are met:
38
39 1. Redistributions of source code must retain the above copyright
40 notice, this list of conditions and the following disclaimer.
41
42 2. Redistributions in binary form must reproduce the above copyright
43 notice, this list of conditions and the following disclaimer in the
44 documentation and/or other materials provided with the distribution.
45
46 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
47 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
48 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
49 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
50 COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
51 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
52 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
53 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
54 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
56 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
57 POSSIBILITY OF SUCH DAMAGE.
58
59 */
60
61 #include "vsearch.h"
62
63 /* options */
64
65 bool opt_bzip2_decompress;
66 bool opt_clusterout_id;
67 bool opt_clusterout_sort;
68 bool opt_eeout;
69 bool opt_fasta_score;
70 bool opt_fastq_allowmergestagger;
71 bool opt_fastq_eeout;
72 bool opt_fastq_nostagger;
73 bool opt_gzip_decompress;
74 bool opt_label_substr_match;
75 bool opt_no_progress;
76 bool opt_quiet;
77 bool opt_relabel_keep;
78 bool opt_relabel_md5;
79 bool opt_relabel_self;
80 bool opt_relabel_sha1;
81 bool opt_samheader;
82 bool opt_sff_clip;
83 bool opt_sizeorder;
84 bool opt_xee;
85 bool opt_xsize;
86 char * opt_allpairs_global;
87 char * opt_alnout;
88 char * opt_biomout;
89 char * opt_blast6out;
90 char * opt_borderline;
91 char * opt_centroids;
92 char * opt_chimeras;
93 char * opt_cluster_fast;
94 char * opt_cluster_size;
95 char * opt_cluster_smallmem;
96 char * opt_cluster_unoise;
97 char * opt_clusters;
98 char * opt_consout;
99 char * opt_cut;
100 char * opt_cut_pattern;
101 char * opt_db;
102 char * opt_dbmatched;
103 char * opt_dbnotmatched;
104 char * opt_derep_fulllength;
105 char * opt_derep_id;
106 char * opt_derep_prefix;
107 char * opt_eetabbedout;
108 char * opt_fastaout;
109 char * opt_fastaout_discarded;
110 char * opt_fastaout_discarded_rev;
111 char * opt_fastaout_notmerged_fwd;
112 char * opt_fastaout_notmerged_rev;
113 char * opt_fastaout_rev;
114 char * opt_fastapairs;
115 char * opt_fastq_chars;
116 char * opt_fastq_convert;
117 char * opt_fastq_eestats;
118 char * opt_fastq_eestats2;
119 char * opt_fastq_filter;
120 char * opt_fastq_join;
121 char * opt_fastq_mergepairs;
122 char * opt_fastq_stats;
123 char * opt_fastqout;
124 char * opt_fastqout_discarded;
125 char * opt_fastqout_discarded_rev;
126 char * opt_fastqout_notmerged_fwd;
127 char * opt_fastqout_notmerged_rev;
128 char * opt_fastqout_rev;
129 char * opt_fastx_filter;
130 char * opt_fastx_getseq;
131 char * opt_fastx_getseqs;
132 char * opt_fastx_getsubseq;
133 char * opt_fastx_mask;
134 char * opt_fastx_revcomp;
135 char * opt_fastx_subsample;
136 char * opt_join_padgap;
137 char * opt_join_padgapq;
138 char * opt_label;
139 char * opt_labels;
140 char * opt_label_suffix;
141 char * opt_label_word;
142 char * opt_label_words;
143 char * opt_label_field;
144 char * opt_log;
145 char * opt_makeudb_usearch;
146 char * opt_maskfasta;
147 char * opt_matched;
148 char * opt_mothur_shared_out;
149 char * opt_msaout;
150 char * opt_nonchimeras;
151 char * opt_notmatched;
152 char * opt_notmatchedfq;
153 char * opt_orient;
154 char * opt_otutabout;
155 char * opt_output;
156 char * opt_pattern;
157 char * opt_profile;
158 char * opt_relabel;
159 char * opt_rereplicate;
160 char * opt_reverse;
161 char * opt_samout;
162 char * opt_search_exact;
163 char * opt_sff_convert;
164 char * opt_shuffle;
165 char * opt_sintax;
166 char * opt_sortbylength;
167 char * opt_sortbysize;
168 char * opt_tabbedout;
169 char * opt_udb2fasta;
170 char * opt_udbinfo;
171 char * opt_udbstats;
172 char * opt_uc;
173 char * opt_uchime_denovo;
174 char * opt_uchime2_denovo;
175 char * opt_uchime3_denovo;
176 char * opt_uchime_ref;
177 char * opt_uchimealns;
178 char * opt_uchimeout;
179 char * opt_usearch_global;
180 char * opt_userout;
181 double * opt_ee_cutoffs_values;
182 double opt_abskew;
183 double opt_dn;
184 double opt_fastq_maxdiffpct;
185 double opt_fastq_maxee;
186 double opt_fastq_maxee_rate;
187 double opt_fastq_truncee;
188 double opt_id;
189 double opt_max_unmasked_pct;
190 double opt_maxid;
191 double opt_maxqt;
192 double opt_maxsizeratio;
193 double opt_maxsl;
194 double opt_mid;
195 double opt_min_unmasked_pct;
196 double opt_mindiv;
197 double opt_minh;
198 double opt_minqt;
199 double opt_minsizeratio;
200 double opt_minsl;
201 double opt_query_cov;
202 double opt_sample_pct;
203 double opt_sintax_cutoff;
204 double opt_target_cov;
205 double opt_unoise_alpha;
206 double opt_weak_id;
207 double opt_xn;
208 int opt_acceptall;
209 int opt_alignwidth;
210 int opt_cons_truncate;
211 int opt_ee_cutoffs_count;
212 int opt_gap_extension_query_interior;
213 int opt_gap_extension_query_left;
214 int opt_gap_extension_query_right;
215 int opt_gap_extension_target_interior;
216 int opt_gap_extension_target_left;
217 int opt_gap_extension_target_right;
218 int opt_gap_open_query_interior;
219 int opt_gap_open_query_left;
220 int opt_gap_open_query_right;
221 int opt_gap_open_target_interior;
222 int opt_gap_open_target_left;
223 int opt_gap_open_target_right;
224 int opt_help;
225 int opt_length_cutoffs_shortest;
226 int opt_length_cutoffs_longest;
227 int opt_length_cutoffs_increment;
228 int opt_mindiffs;
229 int opt_slots;
230 int opt_uchimeout5;
231 int opt_usersort;
232 int opt_version;
233 int64_t opt_dbmask;
234 int64_t opt_fasta_width;
235 int64_t opt_fastq_ascii;
236 int64_t opt_fastq_asciiout;
237 int64_t opt_fastq_maxdiffs;
238 int64_t opt_fastq_maxlen;
239 int64_t opt_fastq_maxmergelen;
240 int64_t opt_fastq_maxns;
241 int64_t opt_fastq_minlen;
242 int64_t opt_fastq_minmergelen;
243 int64_t opt_fastq_minovlen;
244 int64_t opt_fastq_qmax;
245 int64_t opt_fastq_qmaxout;
246 int64_t opt_fastq_qmin;
247 int64_t opt_fastq_qminout;
248 int64_t opt_fastq_stripleft;
249 int64_t opt_fastq_stripright;
250 int64_t opt_fastq_tail;
251 int64_t opt_fastq_trunclen;
252 int64_t opt_fastq_trunclen_keep;
253 int64_t opt_fastq_truncqual;
254 int64_t opt_fulldp;
255 int64_t opt_hardmask;
256 int64_t opt_iddef;
257 int64_t opt_idprefix;
258 int64_t opt_idsuffix;
259 int64_t opt_leftjust;
260 int64_t opt_match;
261 int64_t opt_maxaccepts;
262 int64_t opt_maxdiffs;
263 int64_t opt_maxgaps;
264 int64_t opt_maxhits;
265 int64_t opt_maxqsize;
266 int64_t opt_maxrejects;
267 int64_t opt_maxseqlength;
268 int64_t opt_maxsize;
269 int64_t opt_maxsubs;
270 int64_t opt_maxuniquesize;
271 int64_t opt_mincols;
272 int64_t opt_minseqlength;
273 int64_t opt_minsize;
274 int64_t opt_mintsize;
275 int64_t opt_minuniquesize;
276 int64_t opt_minwordmatches;
277 int64_t opt_mismatch;
278 int64_t opt_notrunclabels;
279 int64_t opt_output_no_hits;
280 int64_t opt_qmask;
281 int64_t opt_randseed;
282 int64_t opt_rightjust;
283 int64_t opt_rowlen;
284 int64_t opt_sample_size;
285 int64_t opt_self;
286 int64_t opt_selfid;
287 int64_t opt_sizein;
288 int64_t opt_sizeout;
289 int64_t opt_strand;
290 int64_t opt_subseq_start;
291 int64_t opt_subseq_end;
292 int64_t opt_threads;
293 int64_t opt_top_hits_only;
294 int64_t opt_topn;
295 int64_t opt_uc_allhits;
296 int64_t opt_wordlength;
297
298 /* Other variables */
299
300 /* cpu features available */
301
302 int64_t altivec_present = 0;
303 int64_t neon_present = 0;
304 int64_t mmx_present = 0;
305 int64_t sse_present = 0;
306 int64_t sse2_present = 0;
307 int64_t sse3_present = 0;
308 int64_t ssse3_present = 0;
309 int64_t sse41_present = 0;
310 int64_t sse42_present = 0;
311 int64_t popcnt_present = 0;
312 int64_t avx_present = 0;
313 int64_t avx2_present = 0;
314
315 static char * progname;
316 static char progheader[80];
317 static char * cmdline;
318 static time_t time_start;
319 static time_t time_finish;
320
321 FILE * fp_log = nullptr;
322
323 char * STDIN_NAME = (char*) "/dev/stdin";
324 char * STDOUT_NAME = (char*) "/dev/stdout";
325
326 #ifdef __x86_64__
327 #define cpuid(f1, f2, a, b, c, d) \
328 __asm__ __volatile__ ("cpuid" \
329 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
330 : "a" (f1), "c" (f2));
331 #endif
332
cpu_features_detect()333 void cpu_features_detect()
334 {
335 #ifdef __aarch64__
336 #ifdef __ARM_NEON
337 /* may check /proc/cpuinfo for asimd or neon */
338 neon_present = 1;
339 #else
340 #error ARM Neon not present
341 #endif
342 #elif __PPC__
343 altivec_present = 1;
344 #elif __x86_64__
345 unsigned int a, b, c, d;
346
347 cpuid(0, 0, a, b, c, d);
348 unsigned int maxlevel = a & 0xff;
349
350 if (maxlevel >= 1)
351 {
352 cpuid(1, 0, a, b, c, d);
353 mmx_present = (d >> 23) & 1;
354 sse_present = (d >> 25) & 1;
355 sse2_present = (d >> 26) & 1;
356 sse3_present = (c >> 0) & 1;
357 ssse3_present = (c >> 9) & 1;
358 sse41_present = (c >> 19) & 1;
359 sse42_present = (c >> 20) & 1;
360 popcnt_present = (c >> 23) & 1;
361 avx_present = (c >> 28) & 1;
362
363 if (maxlevel >= 7)
364 {
365 cpuid(7, 0, a, b, c, d);
366 avx2_present = (b >> 5) & 1;
367 }
368 }
369 #else
370 #error Unknown architecture
371 #endif
372 }
373
cpu_features_show()374 void cpu_features_show()
375 {
376 fprintf(stderr, "CPU features:");
377 if (neon_present) {
378 fprintf(stderr, " neon");
379
380 }
381 if (altivec_present) {
382 fprintf(stderr, " altivec");
383
384 }
385 if (mmx_present) {
386 fprintf(stderr, " mmx");
387
388 }
389 if (sse_present) {
390 fprintf(stderr, " sse");
391
392 }
393 if (sse2_present) {
394 fprintf(stderr, " sse2");
395
396 }
397 if (sse3_present) {
398 fprintf(stderr, " sse3");
399
400 }
401 if (ssse3_present) {
402 fprintf(stderr, " ssse3");
403
404 }
405 if (sse41_present) {
406 fprintf(stderr, " sse4.1");
407
408 }
409 if (sse42_present) {
410 fprintf(stderr, " sse4.2");
411
412 }
413 if (popcnt_present) {
414 fprintf(stderr, " popcnt");
415
416 }
417 if (avx_present) {
418 fprintf(stderr, " avx");
419
420 }
421 if (avx2_present) {
422 fprintf(stderr, " avx2");
423
424 }
425 fprintf(stderr, "\n");
426 }
427
args_get_ee_cutoffs(char * arg)428 void args_get_ee_cutoffs(char * arg)
429 {
430 /* get comma-separated list of floating point numbers */
431 /* save in ee_cutoffs_count and ee_cutoffs_values */
432
433 int commas = 0;
434 for (size_t i=0; i<strlen(arg); i++) {
435 if (arg[i] == ',') {
436 commas++;
437
438 }
439
440 }
441
442 opt_ee_cutoffs_count = 0;
443 opt_ee_cutoffs_values = (double*) xrealloc(opt_ee_cutoffs_values, (commas+1) * sizeof(double));
444
445 char * s = arg;
446 while(true)
447 {
448 double val = 0;
449 int skip = 0;
450
451 if ((sscanf(s, "%lf%n", &val, &skip) != 1) || (val <= 0.0)) {
452 fatal("Invalid arguments to ee_cutoffs");
453
454 }
455
456 opt_ee_cutoffs_values[opt_ee_cutoffs_count++] = val;
457
458 s += skip;
459
460 if (*s == ',') {
461 s++;
462 } else if (*s == 0) {
463 break;
464 } else {
465 fatal("Invalid arguments to ee_cutoffs");
466
467 }
468 }
469 }
470
args_get_length_cutoffs(char * arg)471 void args_get_length_cutoffs(char * arg)
472 {
473 /* get comma-separated list of 3 integers: */
474 /* smallest, largest and increment. */
475 /* second value may be * indicating no limit */
476 /* save in length_cutoffs_{smallest,largest,increment} */
477
478 int skip = 0;
479 if (sscanf(arg, "%d,%d,%d%n", &opt_length_cutoffs_shortest, &opt_length_cutoffs_longest, &opt_length_cutoffs_increment, & skip) == 3)
480 {
481 if ((size_t)skip < strlen(arg)) {
482 fatal("Invalid arguments to length_cutoffs");
483
484 }
485 }
486 else if (sscanf(arg, "%d,*,%d%n", &opt_length_cutoffs_shortest, &opt_length_cutoffs_increment, &skip) == 2)
487 {
488 if ((size_t)skip < strlen(arg)) {
489 fatal("Invalid arguments to length_cutoffs");
490
491 }
492 opt_length_cutoffs_longest = INT_MAX;
493 }
494 else {
495 fatal("Invalid arguments to length_cutoffs");
496
497 }
498
499 if ((opt_length_cutoffs_shortest < 1) ||
500 (opt_length_cutoffs_shortest > opt_length_cutoffs_longest) ||
501 (opt_length_cutoffs_increment < 1)) {
502 fatal("Invalid arguments to length_cutoffs");
503
504 }
505
506 }
507
508
509
args_get_gap_penalty_string(char * arg,int is_open)510 void args_get_gap_penalty_string(char * arg, int is_open)
511 {
512 /* See http://www.drive5.com/usearch/manual/aln_params.html
513
514 --gapopen *E/10I/1E/2L/3RQ/4RT/1IQ
515 --gapext *E/10I/1E/2L/3RQ/4RT/1IQ
516
517 integer or *
518 followed by I, E, L, R, Q or T characters
519 separated by /
520 * means infinitely high (disallow)
521 E=end
522 I=interior
523 L=left
524 R=right
525 Q=query
526 T=target
527
528 E cannot be combined with L or R
529
530 We do not support floating point values. Therefore,
531 all default score and penalties are multiplied by 2.
532
533 */
534
535 char *p = arg;
536
537 while (*p)
538 {
539 int skip = 0;
540 int pen = 0;
541
542 if (sscanf(p, "%d%n", &pen, &skip) == 1)
543 {
544 p += skip;
545 }
546 else if (*p == '*')
547 {
548 pen = 1000;
549 p++;
550 }
551 else {
552 fatal("Invalid gap penalty argument (%s)", p);
553
554 }
555
556 char * q = p;
557
558 int set_E = 0;
559 int set_I = 0;
560 int set_L = 0;
561 int set_R = 0;
562 int set_Q = 0;
563 int set_T = 0;
564
565 while((*p) && (*p != '/'))
566 {
567 switch(*p)
568 {
569 case 'E':
570 set_E = 1;
571 break;
572 case 'I':
573 set_I = 1;
574 break;
575 case 'L':
576 set_L = 1;
577 break;
578 case 'R':
579 set_R = 1;
580 break;
581 case 'Q':
582 set_Q = 1;
583 break;
584 case 'T':
585 set_T = 1;
586 break;
587 default:
588 fatal("Invalid char '%.1s' in gap penalty string", p);
589 break;
590 }
591 p++;
592 }
593
594 if (*p == '/') {
595 p++;
596
597 }
598
599 if (set_E && (set_L || set_R)) {
600 fatal("Invalid gap penalty string (E and L or R) '%s'", q);
601
602 }
603
604 if (set_E)
605 {
606 set_L = 1;
607 set_R = 1;
608 }
609
610 /* if neither L, I, R nor E is specified, it applies to all */
611
612 if ((!set_L) && (!set_I) && (!set_R))
613 {
614 set_L = 1;
615 set_I = 1;
616 set_R = 1;
617 }
618
619 /* if neither Q nor T is specified, it applies to both */
620
621 if ((!set_Q) && (!set_T))
622 {
623 set_Q = 1;
624 set_T = 1;
625 }
626
627 if (is_open)
628 {
629 if (set_Q)
630 {
631 if (set_L) {
632 opt_gap_open_query_left = pen;
633
634 }
635 if (set_I) {
636 opt_gap_open_query_interior = pen;
637
638 }
639 if (set_R) {
640 opt_gap_open_query_right = pen;
641
642 }
643 }
644 if (set_T)
645 {
646 if (set_L) {
647 opt_gap_open_target_left = pen;
648
649 }
650 if (set_I) {
651 opt_gap_open_target_interior = pen;
652
653 }
654 if (set_R) {
655 opt_gap_open_target_right = pen;
656
657 }
658 }
659 }
660 else
661 {
662 if (set_Q)
663 {
664 if (set_L) {
665 opt_gap_extension_query_left = pen;
666
667 }
668 if (set_I) {
669 opt_gap_extension_query_interior = pen;
670
671 }
672 if (set_R) {
673 opt_gap_extension_query_right = pen;
674
675 }
676 }
677 if (set_T)
678 {
679 if (set_L) {
680 opt_gap_extension_target_left = pen;
681
682 }
683 if (set_I) {
684 opt_gap_extension_target_interior = pen;
685
686 }
687 if (set_R) {
688 opt_gap_extension_target_right = pen;
689
690 }
691 }
692 }
693 }
694 }
695
696
args_getlong(char * arg)697 int64_t args_getlong(char * arg)
698 {
699 int len = 0;
700 int64_t temp = 0;
701 int ret = sscanf(arg, "%" PRId64 "%n", &temp, &len);
702 if ((ret == 0) || (((unsigned int)(len)) < strlen(arg))) {
703 fatal("Illegal option argument");
704
705 }
706 return temp;
707 }
708
args_getdouble(char * arg)709 double args_getdouble(char * arg)
710 {
711 int len = 0;
712 double temp = 0;
713 int ret = sscanf(arg, "%lf%n", &temp, &len);
714 if ((ret == 0) || (((unsigned int)(len)) < strlen(arg))) {
715 fatal("Illegal option argument");
716
717 }
718 return temp;
719 }
720
args_init(int argc,char ** argv)721 void args_init(int argc, char **argv)
722 {
723 /* Set defaults */
724
725 progname = argv[0];
726
727 opt_abskew = -1.0;
728 opt_acceptall = 0;
729 opt_alignwidth = 80;
730 opt_allpairs_global = nullptr;
731 opt_alnout = nullptr;
732 opt_blast6out = nullptr;
733 opt_biomout = nullptr;
734 opt_borderline = nullptr;
735 opt_bzip2_decompress = false;
736 opt_centroids = nullptr;
737 opt_chimeras = nullptr;
738 opt_cluster_fast = nullptr;
739 opt_cluster_size = nullptr;
740 opt_cluster_smallmem = nullptr;
741 opt_cluster_unoise = nullptr;
742 opt_clusterout_id = false;
743 opt_clusterout_sort = false;
744 opt_clusters = nullptr;
745 opt_cons_truncate = 0;
746 opt_consout = nullptr;
747 opt_cut = nullptr;
748 opt_cut_pattern = nullptr;
749 opt_db = nullptr;
750 opt_dbmask = MASK_DUST;
751 opt_dbmatched = nullptr;
752 opt_dbnotmatched = nullptr;
753 opt_derep_fulllength = nullptr;
754 opt_derep_id = nullptr;
755 opt_derep_prefix = nullptr;
756 opt_dn = 1.4;
757 opt_ee_cutoffs_count = 3;
758 opt_ee_cutoffs_values = (double*) xmalloc(opt_ee_cutoffs_count * sizeof(double));
759 opt_ee_cutoffs_values[0] = 0.5;
760 opt_ee_cutoffs_values[1] = 1.0;
761 opt_ee_cutoffs_values[2] = 2.0;
762 opt_eeout = false;
763 opt_eetabbedout = nullptr;
764 opt_fastaout_notmerged_fwd = nullptr;
765 opt_fastaout_notmerged_rev = nullptr;
766 opt_fasta_score = false;
767 opt_fasta_width = 80;
768 opt_fastaout = nullptr;
769 opt_fastaout_discarded = nullptr;
770 opt_fastaout_discarded_rev = nullptr;
771 opt_fastaout_rev = nullptr;
772 opt_fastapairs = nullptr;
773 opt_fastq_allowmergestagger = false;
774 opt_fastq_ascii = 33;
775 opt_fastq_asciiout = 33;
776 opt_fastq_chars = nullptr;
777 opt_fastq_convert = nullptr;
778 opt_fastq_eeout = false;
779 opt_fastq_eestats = nullptr;
780 opt_fastq_eestats2 = nullptr;
781 opt_fastq_filter = nullptr;
782 opt_fastq_join = nullptr;
783 opt_fastq_maxdiffpct = 100.0;
784 opt_fastq_maxdiffs = 10;
785 opt_fastq_maxee = DBL_MAX;
786 opt_fastq_maxee_rate = DBL_MAX;
787 opt_fastq_maxlen = LONG_MAX;
788 opt_fastq_maxmergelen = 1000000;
789 opt_fastq_maxns = LONG_MAX;
790 opt_fastq_mergepairs = nullptr;
791 opt_fastq_minlen = 1;
792 opt_fastq_minmergelen = 0;
793 opt_fastq_minovlen = 10;
794 opt_fastq_nostagger = true;
795 opt_fastqout_notmerged_fwd = nullptr;
796 opt_fastqout_notmerged_rev = nullptr;
797 opt_fastq_qmax = 41;
798 opt_fastq_qmaxout = 41;
799 opt_fastq_qmin = 0;
800 opt_fastq_qminout = 0;
801 opt_fastq_stats = nullptr;
802 opt_fastq_stripleft = 0;
803 opt_fastq_stripright = 0;
804 opt_fastq_tail = 4;
805 opt_fastq_truncee = DBL_MAX;
806 opt_fastq_trunclen = -1;
807 opt_fastq_trunclen_keep = -1;
808 opt_fastq_truncqual = LONG_MIN;
809 opt_fastqout = nullptr;
810 opt_fastqout_discarded = nullptr;
811 opt_fastqout_discarded_rev = nullptr;
812 opt_fastqout_rev = nullptr;
813 opt_fastx_filter = nullptr;
814 opt_fastx_mask = nullptr;
815 opt_fastx_revcomp = nullptr;
816 opt_fastx_subsample = nullptr;
817 opt_fulldp = 0;
818 opt_gap_extension_query_interior=2;
819 opt_gap_extension_query_left=1;
820 opt_gap_extension_query_right=1;
821 opt_gap_extension_target_interior=2;
822 opt_gap_extension_target_left=1;
823 opt_gap_extension_target_right=1;
824 opt_gap_open_query_interior=20;
825 opt_gap_open_query_left=2;
826 opt_gap_open_query_right=2;
827 opt_gap_open_target_interior=20;
828 opt_gap_open_target_left=2;
829 opt_gap_open_target_right=2;
830 opt_fastx_getseq = nullptr;
831 opt_fastx_getseqs = nullptr;
832 opt_fastx_getsubseq = nullptr;
833 opt_gzip_decompress = false;
834 opt_hardmask = 0;
835 opt_help = 0;
836 opt_id = -1.0;
837 opt_iddef = 2;
838 opt_idprefix = 0;
839 opt_idsuffix = 0;
840 opt_join_padgap = nullptr;
841 opt_join_padgapq = nullptr;
842 opt_label = nullptr;
843 opt_label_substr_match = false;
844 opt_label_suffix = nullptr;
845 opt_labels = nullptr;
846 opt_label_field = nullptr;
847 opt_label_word = nullptr;
848 opt_label_words = nullptr;
849 opt_leftjust = 0;
850 opt_length_cutoffs_increment = 50;
851 opt_length_cutoffs_longest = INT_MAX;
852 opt_length_cutoffs_shortest = 50;
853 opt_log = nullptr;
854 opt_makeudb_usearch = nullptr;
855 opt_maskfasta = nullptr;
856 opt_match = 2;
857 opt_matched = nullptr;
858 opt_max_unmasked_pct = 100.0;
859 opt_maxaccepts = 1;
860 opt_maxdiffs = INT_MAX;
861 opt_maxgaps = INT_MAX;
862 opt_maxhits = 0;
863 opt_maxid = 1.0;
864 opt_maxqsize = INT_MAX;
865 opt_maxqt = DBL_MAX;
866 opt_maxrejects = -1;
867 opt_maxseqlength = 50000;
868 opt_maxsize = LONG_MAX;
869 opt_maxsizeratio = DBL_MAX;
870 opt_maxsl = DBL_MAX;
871 opt_maxsubs = INT_MAX;
872 opt_maxuniquesize = LONG_MAX;
873 opt_mid = 0.0;
874 opt_min_unmasked_pct = 0.0;
875 opt_mincols = 0;
876 opt_mindiffs = 3;
877 opt_mindiv = 0.8;
878 opt_minh = 0.28;
879 opt_minqt = 0.0;
880 opt_minseqlength = -1;
881 opt_minsize = 0;
882 opt_minsizeratio = 0.0;
883 opt_minsl = 0.0;
884 opt_mintsize = 0;
885 opt_minuniquesize = 1;
886 opt_minwordmatches = -1;
887 opt_mismatch = -4;
888 opt_mothur_shared_out = nullptr;
889 opt_msaout = nullptr;
890 opt_no_progress = false;
891 opt_nonchimeras = nullptr;
892 opt_notmatched = nullptr;
893 opt_notmatched = nullptr;
894 opt_notrunclabels = 0;
895 opt_orient = nullptr;
896 opt_otutabout = nullptr;
897 opt_output = nullptr;
898 opt_output_no_hits = 0;
899 opt_pattern = nullptr;
900 opt_profile = nullptr;
901 opt_qmask = MASK_DUST;
902 opt_query_cov = 0.0;
903 opt_quiet = false;
904 opt_randseed = 0;
905 opt_relabel = nullptr;
906 opt_relabel_keep = false;
907 opt_relabel_md5 = false;
908 opt_relabel_self = false;
909 opt_relabel_sha1 = false;
910 opt_rereplicate = nullptr;
911 opt_reverse = nullptr;
912 opt_rightjust = 0;
913 opt_rowlen = 64;
914 opt_samheader = false;
915 opt_samout = nullptr;
916 opt_sample_pct = 0;
917 opt_sample_size = 0;
918 opt_search_exact = nullptr;
919 opt_self = 0;
920 opt_selfid = 0;
921 opt_sff_convert = nullptr;
922 opt_sff_clip = false;
923 opt_shuffle = nullptr;
924 opt_sintax = nullptr;
925 opt_sintax_cutoff = 0.0;
926 opt_sizein = 0;
927 opt_sizeorder = false;
928 opt_sizeout = 0;
929 opt_slots = 0;
930 opt_sortbylength = nullptr;
931 opt_sortbysize = nullptr;
932 opt_strand = 1;
933 opt_subseq_start = 1;
934 opt_subseq_end = LONG_MAX;
935 opt_tabbedout = nullptr;
936 opt_target_cov = 0.0;
937 opt_threads = 0;
938 opt_top_hits_only = 0;
939 opt_topn = LONG_MAX;
940 opt_udb2fasta = nullptr;
941 opt_udbinfo = nullptr;
942 opt_udbstats = nullptr;
943 opt_uc = nullptr;
944 opt_uc_allhits = 0;
945 opt_uchime_denovo = nullptr;
946 opt_uchime2_denovo = nullptr;
947 opt_uchime3_denovo = nullptr;
948 opt_uchime_ref = nullptr;
949 opt_uchimealns = nullptr;
950 opt_uchimeout = nullptr;
951 opt_uchimeout5 = 0;
952 opt_unoise_alpha = 2.0;
953 opt_usearch_global = nullptr;
954 opt_userout = nullptr;
955 opt_usersort = 0;
956 opt_version = 0;
957 opt_weak_id = 10.0;
958 opt_wordlength = 0;
959 opt_xn = 8.0;
960 opt_xsize = false;
961 opt_xee = false;
962
963 opterr = 1;
964
965 enum
966 {
967 option_abskew,
968 option_acceptall,
969 option_alignwidth,
970 option_allpairs_global,
971 option_alnout,
972 option_band,
973 option_biomout,
974 option_blast6out,
975 option_borderline,
976 option_bzip2_decompress,
977 option_centroids,
978 option_chimeras,
979 option_cluster_fast,
980 option_cluster_size,
981 option_cluster_smallmem,
982 option_cluster_unoise,
983 option_clusterout_id,
984 option_clusterout_sort,
985 option_clusters,
986 option_cons_truncate,
987 option_consout,
988 option_cut,
989 option_cut_pattern,
990 option_db,
991 option_dbmask,
992 option_dbmatched,
993 option_dbnotmatched,
994 option_derep_fulllength,
995 option_derep_id,
996 option_derep_prefix,
997 option_dn,
998 option_ee_cutoffs,
999 option_eeout,
1000 option_eetabbedout,
1001 option_fasta_score,
1002 option_fasta_width,
1003 option_fastaout,
1004 option_fastaout_discarded,
1005 option_fastaout_discarded_rev,
1006 option_fastaout_notmerged_fwd,
1007 option_fastaout_notmerged_rev,
1008 option_fastaout_rev,
1009 option_fastapairs,
1010 option_fastq_allowmergestagger,
1011 option_fastq_ascii,
1012 option_fastq_asciiout,
1013 option_fastq_chars,
1014 option_fastq_convert,
1015 option_fastq_eeout,
1016 option_fastq_eestats,
1017 option_fastq_eestats2,
1018 option_fastq_filter,
1019 option_fastq_join,
1020 option_fastq_maxdiffpct,
1021 option_fastq_maxdiffs,
1022 option_fastq_maxee,
1023 option_fastq_maxee_rate,
1024 option_fastq_maxlen,
1025 option_fastq_maxmergelen,
1026 option_fastq_maxns,
1027 option_fastq_mergepairs,
1028 option_fastq_minlen,
1029 option_fastq_minmergelen,
1030 option_fastq_minovlen,
1031 option_fastq_nostagger,
1032 option_fastq_qmax,
1033 option_fastq_qmaxout,
1034 option_fastq_qmin,
1035 option_fastq_qminout,
1036 option_fastq_stats,
1037 option_fastq_stripleft,
1038 option_fastq_stripright,
1039 option_fastq_tail,
1040 option_fastq_truncee,
1041 option_fastq_trunclen,
1042 option_fastq_trunclen_keep,
1043 option_fastq_truncqual,
1044 option_fastqout,
1045 option_fastqout_discarded,
1046 option_fastqout_discarded_rev,
1047 option_fastqout_notmerged_fwd,
1048 option_fastqout_notmerged_rev,
1049 option_fastqout_rev,
1050 option_fastx_filter,
1051 option_fastx_getseq,
1052 option_fastx_getseqs,
1053 option_fastx_getsubseq,
1054 option_fastx_mask,
1055 option_fastx_revcomp,
1056 option_fastx_subsample,
1057 option_fulldp,
1058 option_gapext,
1059 option_gapopen,
1060 option_gzip_decompress,
1061 option_h,
1062 option_hardmask,
1063 option_help,
1064 option_hspw,
1065 option_id,
1066 option_iddef,
1067 option_idprefix,
1068 option_idsuffix,
1069 option_join_padgap,
1070 option_join_padgapq,
1071 option_label,
1072 option_label_field,
1073 option_label_substr_match,
1074 option_label_suffix,
1075 option_label_word,
1076 option_label_words,
1077 option_labels,
1078 option_leftjust,
1079 option_length_cutoffs,
1080 option_log,
1081 option_makeudb_usearch,
1082 option_maskfasta,
1083 option_match,
1084 option_matched,
1085 option_max_unmasked_pct,
1086 option_maxaccepts,
1087 option_maxdiffs,
1088 option_maxgaps,
1089 option_maxhits,
1090 option_maxid,
1091 option_maxqsize,
1092 option_maxqt,
1093 option_maxrejects,
1094 option_maxseqlength,
1095 option_maxsize,
1096 option_maxsizeratio,
1097 option_maxsl,
1098 option_maxsubs,
1099 option_maxuniquesize,
1100 option_mid,
1101 option_min_unmasked_pct,
1102 option_mincols,
1103 option_mindiffs,
1104 option_mindiv,
1105 option_minh,
1106 option_minhsp,
1107 option_minqt,
1108 option_minseqlength,
1109 option_minsize,
1110 option_minsizeratio,
1111 option_minsl,
1112 option_mintsize,
1113 option_minuniquesize,
1114 option_minwordmatches,
1115 option_mismatch,
1116 option_mothur_shared_out,
1117 option_msaout,
1118 option_no_progress,
1119 option_nonchimeras,
1120 option_notmatched,
1121 option_notmatchedfq,
1122 option_notrunclabels,
1123 option_orient,
1124 option_otutabout,
1125 option_output,
1126 option_output_no_hits,
1127 option_pattern,
1128 option_profile,
1129 option_qmask,
1130 option_query_cov,
1131 option_quiet,
1132 option_randseed,
1133 option_relabel,
1134 option_relabel_keep,
1135 option_relabel_md5,
1136 option_relabel_self,
1137 option_relabel_sha1,
1138 option_rereplicate,
1139 option_reverse,
1140 option_rightjust,
1141 option_rowlen,
1142 option_samheader,
1143 option_samout,
1144 option_sample_pct,
1145 option_sample_size,
1146 option_search_exact,
1147 option_self,
1148 option_selfid,
1149 option_sff_clip,
1150 option_sff_convert,
1151 option_shuffle,
1152 option_sintax,
1153 option_sintax_cutoff,
1154 option_sizein,
1155 option_sizeorder,
1156 option_sizeout,
1157 option_slots,
1158 option_sortbylength,
1159 option_sortbysize,
1160 option_strand,
1161 option_subseq_end,
1162 option_subseq_start,
1163 option_tabbedout,
1164 option_target_cov,
1165 option_threads,
1166 option_top_hits_only,
1167 option_topn,
1168 option_uc,
1169 option_uc_allhits,
1170 option_uchime2_denovo,
1171 option_uchime3_denovo,
1172 option_uchime_denovo,
1173 option_uchime_ref,
1174 option_uchimealns,
1175 option_uchimeout,
1176 option_uchimeout5,
1177 option_udb2fasta,
1178 option_udbinfo,
1179 option_udbstats,
1180 option_unoise_alpha,
1181 option_usearch_global,
1182 option_userfields,
1183 option_userout,
1184 option_usersort,
1185 option_v,
1186 option_version,
1187 option_weak_id,
1188 option_wordlength,
1189 option_xdrop_nw,
1190 option_xee,
1191 option_xn,
1192 option_xsize
1193 };
1194
1195 static struct option long_options[] =
1196 {
1197 {"abskew", required_argument, nullptr, 0 },
1198 {"acceptall", no_argument, nullptr, 0 },
1199 {"alignwidth", required_argument, nullptr, 0 },
1200 {"allpairs_global", required_argument, nullptr, 0 },
1201 {"alnout", required_argument, nullptr, 0 },
1202 {"band", required_argument, nullptr, 0 },
1203 {"biomout", required_argument, nullptr, 0 },
1204 {"blast6out", required_argument, nullptr, 0 },
1205 {"borderline", required_argument, nullptr, 0 },
1206 {"bzip2_decompress", no_argument, nullptr, 0 },
1207 {"centroids", required_argument, nullptr, 0 },
1208 {"chimeras", required_argument, nullptr, 0 },
1209 {"cluster_fast", required_argument, nullptr, 0 },
1210 {"cluster_size", required_argument, nullptr, 0 },
1211 {"cluster_smallmem", required_argument, nullptr, 0 },
1212 {"cluster_unoise", required_argument, nullptr, 0 },
1213 {"clusterout_id", no_argument, nullptr, 0 },
1214 {"clusterout_sort", no_argument, nullptr, 0 },
1215 {"clusters", required_argument, nullptr, 0 },
1216 {"cons_truncate", no_argument, nullptr, 0 },
1217 {"consout", required_argument, nullptr, 0 },
1218 {"cut", required_argument, nullptr, 0 },
1219 {"cut_pattern", required_argument, nullptr, 0 },
1220 {"db", required_argument, nullptr, 0 },
1221 {"dbmask", required_argument, nullptr, 0 },
1222 {"dbmatched", required_argument, nullptr, 0 },
1223 {"dbnotmatched", required_argument, nullptr, 0 },
1224 {"derep_fulllength", required_argument, nullptr, 0 },
1225 {"derep_id", required_argument, nullptr, 0 },
1226 {"derep_prefix", required_argument, nullptr, 0 },
1227 {"dn", required_argument, nullptr, 0 },
1228 {"ee_cutoffs", required_argument, nullptr, 0 },
1229 {"eeout", no_argument, nullptr, 0 },
1230 {"eetabbedout", required_argument, nullptr, 0 },
1231 {"fasta_score", no_argument, nullptr, 0 },
1232 {"fasta_width", required_argument, nullptr, 0 },
1233 {"fastaout", required_argument, nullptr, 0 },
1234 {"fastaout_discarded", required_argument, nullptr, 0 },
1235 {"fastaout_discarded_rev",required_argument, nullptr, 0 },
1236 {"fastaout_notmerged_fwd",required_argument, nullptr, 0 },
1237 {"fastaout_notmerged_rev",required_argument, nullptr, 0 },
1238 {"fastaout_rev", required_argument, nullptr, 0 },
1239 {"fastapairs", required_argument, nullptr, 0 },
1240 {"fastq_allowmergestagger", no_argument, nullptr, 0 },
1241 {"fastq_ascii", required_argument, nullptr, 0 },
1242 {"fastq_asciiout", required_argument, nullptr, 0 },
1243 {"fastq_chars", required_argument, nullptr, 0 },
1244 {"fastq_convert", required_argument, nullptr, 0 },
1245 {"fastq_eeout", no_argument, nullptr, 0 },
1246 {"fastq_eestats", required_argument, nullptr, 0 },
1247 {"fastq_eestats2", required_argument, nullptr, 0 },
1248 {"fastq_filter", required_argument, nullptr, 0 },
1249 {"fastq_join", required_argument, nullptr, 0 },
1250 {"fastq_maxdiffpct", required_argument, nullptr, 0 },
1251 {"fastq_maxdiffs", required_argument, nullptr, 0 },
1252 {"fastq_maxee", required_argument, nullptr, 0 },
1253 {"fastq_maxee_rate", required_argument, nullptr, 0 },
1254 {"fastq_maxlen", required_argument, nullptr, 0 },
1255 {"fastq_maxmergelen", required_argument, nullptr, 0 },
1256 {"fastq_maxns", required_argument, nullptr, 0 },
1257 {"fastq_mergepairs", required_argument, nullptr, 0 },
1258 {"fastq_minlen", required_argument, nullptr, 0 },
1259 {"fastq_minmergelen", required_argument, nullptr, 0 },
1260 {"fastq_minovlen", required_argument, nullptr, 0 },
1261 {"fastq_nostagger", no_argument, nullptr, 0 },
1262 {"fastq_qmax", required_argument, nullptr, 0 },
1263 {"fastq_qmaxout", required_argument, nullptr, 0 },
1264 {"fastq_qmin", required_argument, nullptr, 0 },
1265 {"fastq_qminout", required_argument, nullptr, 0 },
1266 {"fastq_stats", required_argument, nullptr, 0 },
1267 {"fastq_stripleft", required_argument, nullptr, 0 },
1268 {"fastq_stripright", required_argument, nullptr, 0 },
1269 {"fastq_tail", required_argument, nullptr, 0 },
1270 {"fastq_truncee", required_argument, nullptr, 0 },
1271 {"fastq_trunclen", required_argument, nullptr, 0 },
1272 {"fastq_trunclen_keep", required_argument, nullptr, 0 },
1273 {"fastq_truncqual", required_argument, nullptr, 0 },
1274 {"fastqout", required_argument, nullptr, 0 },
1275 {"fastqout_discarded", required_argument, nullptr, 0 },
1276 {"fastqout_discarded_rev",required_argument, nullptr, 0 },
1277 {"fastqout_notmerged_fwd",required_argument, nullptr, 0 },
1278 {"fastqout_notmerged_rev",required_argument, nullptr, 0 },
1279 {"fastqout_rev", required_argument, nullptr, 0 },
1280 {"fastx_filter", required_argument, nullptr, 0 },
1281 {"fastx_getseq", required_argument, nullptr, 0 },
1282 {"fastx_getseqs", required_argument, nullptr, 0 },
1283 {"fastx_getsubseq", required_argument, nullptr, 0 },
1284 {"fastx_mask", required_argument, nullptr, 0 },
1285 {"fastx_revcomp", required_argument, nullptr, 0 },
1286 {"fastx_subsample", required_argument, nullptr, 0 },
1287 {"fulldp", no_argument, nullptr, 0 },
1288 {"gapext", required_argument, nullptr, 0 },
1289 {"gapopen", required_argument, nullptr, 0 },
1290 {"gzip_decompress", no_argument, nullptr, 0 },
1291 {"h", no_argument, nullptr, 0 },
1292 {"hardmask", no_argument, nullptr, 0 },
1293 {"help", no_argument, nullptr, 0 },
1294 {"hspw", required_argument, nullptr, 0 },
1295 {"id", required_argument, nullptr, 0 },
1296 {"iddef", required_argument, nullptr, 0 },
1297 {"idprefix", required_argument, nullptr, 0 },
1298 {"idsuffix", required_argument, nullptr, 0 },
1299 {"join_padgap", required_argument, nullptr, 0 },
1300 {"join_padgapq", required_argument, nullptr, 0 },
1301 {"label", required_argument, nullptr, 0 },
1302 {"label_field", required_argument, nullptr, 0 },
1303 {"label_substr_match", no_argument, nullptr, 0 },
1304 {"label_suffix", required_argument, nullptr, 0 },
1305 {"label_word", required_argument, nullptr, 0 },
1306 {"label_words", required_argument, nullptr, 0 },
1307 {"labels", required_argument, nullptr, 0 },
1308 {"leftjust", no_argument, nullptr, 0 },
1309 {"length_cutoffs", required_argument, nullptr, 0 },
1310 {"log", required_argument, nullptr, 0 },
1311 {"makeudb_usearch", required_argument, nullptr, 0 },
1312 {"maskfasta", required_argument, nullptr, 0 },
1313 {"match", required_argument, nullptr, 0 },
1314 {"matched", required_argument, nullptr, 0 },
1315 {"max_unmasked_pct", required_argument, nullptr, 0 },
1316 {"maxaccepts", required_argument, nullptr, 0 },
1317 {"maxdiffs", required_argument, nullptr, 0 },
1318 {"maxgaps", required_argument, nullptr, 0 },
1319 {"maxhits", required_argument, nullptr, 0 },
1320 {"maxid", required_argument, nullptr, 0 },
1321 {"maxqsize", required_argument, nullptr, 0 },
1322 {"maxqt", required_argument, nullptr, 0 },
1323 {"maxrejects", required_argument, nullptr, 0 },
1324 {"maxseqlength", required_argument, nullptr, 0 },
1325 {"maxsize", required_argument, nullptr, 0 },
1326 {"maxsizeratio", required_argument, nullptr, 0 },
1327 {"maxsl", required_argument, nullptr, 0 },
1328 {"maxsubs", required_argument, nullptr, 0 },
1329 {"maxuniquesize", required_argument, nullptr, 0 },
1330 {"mid", required_argument, nullptr, 0 },
1331 {"min_unmasked_pct", required_argument, nullptr, 0 },
1332 {"mincols", required_argument, nullptr, 0 },
1333 {"mindiffs", required_argument, nullptr, 0 },
1334 {"mindiv", required_argument, nullptr, 0 },
1335 {"minh", required_argument, nullptr, 0 },
1336 {"minhsp", required_argument, nullptr, 0 },
1337 {"minqt", required_argument, nullptr, 0 },
1338 {"minseqlength", required_argument, nullptr, 0 },
1339 {"minsize", required_argument, nullptr, 0 },
1340 {"minsizeratio", required_argument, nullptr, 0 },
1341 {"minsl", required_argument, nullptr, 0 },
1342 {"mintsize", required_argument, nullptr, 0 },
1343 {"minuniquesize", required_argument, nullptr, 0 },
1344 {"minwordmatches", required_argument, nullptr, 0 },
1345 {"mismatch", required_argument, nullptr, 0 },
1346 {"mothur_shared_out", required_argument, nullptr, 0 },
1347 {"msaout", required_argument, nullptr, 0 },
1348 {"no_progress", no_argument, nullptr, 0 },
1349 {"nonchimeras", required_argument, nullptr, 0 },
1350 {"notmatched", required_argument, nullptr, 0 },
1351 {"notmatchedfq", required_argument, nullptr, 0 },
1352 {"notrunclabels", no_argument, nullptr, 0 },
1353 {"orient", required_argument, nullptr, 0 },
1354 {"otutabout", required_argument, nullptr, 0 },
1355 {"output", required_argument, nullptr, 0 },
1356 {"output_no_hits", no_argument, nullptr, 0 },
1357 {"pattern", required_argument, nullptr, 0 },
1358 {"profile", required_argument, nullptr, 0 },
1359 {"qmask", required_argument, nullptr, 0 },
1360 {"query_cov", required_argument, nullptr, 0 },
1361 {"quiet", no_argument, nullptr, 0 },
1362 {"randseed", required_argument, nullptr, 0 },
1363 {"relabel", required_argument, nullptr, 0 },
1364 {"relabel_keep", no_argument, nullptr, 0 },
1365 {"relabel_md5", no_argument, nullptr, 0 },
1366 {"relabel_self", no_argument, nullptr, 0 },
1367 {"relabel_sha1", no_argument, nullptr, 0 },
1368 {"rereplicate", required_argument, nullptr, 0 },
1369 {"reverse", required_argument, nullptr, 0 },
1370 {"rightjust", no_argument, nullptr, 0 },
1371 {"rowlen", required_argument, nullptr, 0 },
1372 {"samheader", no_argument, nullptr, 0 },
1373 {"samout", required_argument, nullptr, 0 },
1374 {"sample_pct", required_argument, nullptr, 0 },
1375 {"sample_size", required_argument, nullptr, 0 },
1376 {"search_exact", required_argument, nullptr, 0 },
1377 {"self", no_argument, nullptr, 0 },
1378 {"selfid", no_argument, nullptr, 0 },
1379 {"sff_clip", no_argument, nullptr, 0 },
1380 {"sff_convert", required_argument, nullptr, 0 },
1381 {"shuffle", required_argument, nullptr, 0 },
1382 {"sintax", required_argument, nullptr, 0 },
1383 {"sintax_cutoff", required_argument, nullptr, 0 },
1384 {"sizein", no_argument, nullptr, 0 },
1385 {"sizeorder", no_argument, nullptr, 0 },
1386 {"sizeout", no_argument, nullptr, 0 },
1387 {"slots", required_argument, nullptr, 0 },
1388 {"sortbylength", required_argument, nullptr, 0 },
1389 {"sortbysize", required_argument, nullptr, 0 },
1390 {"strand", required_argument, nullptr, 0 },
1391 {"subseq_end", required_argument, nullptr, 0 },
1392 {"subseq_start", required_argument, nullptr, 0 },
1393 {"tabbedout", required_argument, nullptr, 0 },
1394 {"target_cov", required_argument, nullptr, 0 },
1395 {"threads", required_argument, nullptr, 0 },
1396 {"top_hits_only", no_argument, nullptr, 0 },
1397 {"topn", required_argument, nullptr, 0 },
1398 {"uc", required_argument, nullptr, 0 },
1399 {"uc_allhits", no_argument, nullptr, 0 },
1400 {"uchime2_denovo", required_argument, nullptr, 0 },
1401 {"uchime3_denovo", required_argument, nullptr, 0 },
1402 {"uchime_denovo", required_argument, nullptr, 0 },
1403 {"uchime_ref", required_argument, nullptr, 0 },
1404 {"uchimealns", required_argument, nullptr, 0 },
1405 {"uchimeout", required_argument, nullptr, 0 },
1406 {"uchimeout5", no_argument, nullptr, 0 },
1407 {"udb2fasta", required_argument, nullptr, 0 },
1408 {"udbinfo", required_argument, nullptr, 0 },
1409 {"udbstats", required_argument, nullptr, 0 },
1410 {"unoise_alpha", required_argument, nullptr, 0 },
1411 {"usearch_global", required_argument, nullptr, 0 },
1412 {"userfields", required_argument, nullptr, 0 },
1413 {"userout", required_argument, nullptr, 0 },
1414 {"usersort", no_argument, nullptr, 0 },
1415 {"v", no_argument, nullptr, 0 },
1416 {"version", no_argument, nullptr, 0 },
1417 {"weak_id", required_argument, nullptr, 0 },
1418 {"wordlength", required_argument, nullptr, 0 },
1419 {"xdrop_nw", required_argument, nullptr, 0 },
1420 {"xee", no_argument, nullptr, 0 },
1421 {"xn", required_argument, nullptr, 0 },
1422 {"xsize", no_argument, nullptr, 0 },
1423 { nullptr, 0, nullptr, 0 }
1424 };
1425
1426 const int options_count = (sizeof(long_options) / sizeof(struct option)) - 1;
1427
1428 bool options_selected[options_count];
1429 memset(options_selected, 0, sizeof(options_selected));
1430
1431 int options_index = 0;
1432 int c;
1433
1434 while ((c = getopt_long_only(argc, argv, "", long_options,
1435 &options_index)) == 0)
1436 {
1437 if (options_index < options_count) {
1438 options_selected[options_index] = true;
1439
1440 }
1441
1442 switch(options_index)
1443 {
1444 case option_help:
1445 opt_help = 1;
1446 break;
1447
1448 case option_version:
1449 opt_version = 1;
1450 break;
1451
1452 case option_alnout:
1453 opt_alnout = optarg;
1454 break;
1455
1456 case option_usearch_global:
1457 opt_usearch_global = optarg;
1458 break;
1459
1460 case option_db:
1461 opt_db = optarg;
1462 break;
1463
1464 case option_id:
1465 opt_id = args_getdouble(optarg);
1466 break;
1467
1468 case option_maxaccepts:
1469 opt_maxaccepts = args_getlong(optarg);
1470 break;
1471
1472 case option_maxrejects:
1473 opt_maxrejects = args_getlong(optarg);
1474 break;
1475
1476 case option_wordlength:
1477 opt_wordlength = args_getlong(optarg);
1478 break;
1479
1480 case option_match:
1481 opt_match = args_getlong(optarg);
1482 break;
1483
1484 case option_mismatch:
1485 opt_mismatch = args_getlong(optarg);
1486 break;
1487
1488 case option_fulldp:
1489 opt_fulldp = 1;
1490 fprintf(stderr, "WARNING: Option --fulldp is ignored\n");
1491 break;
1492
1493 case option_strand:
1494 if (strcasecmp(optarg, "plus") == 0) {
1495 opt_strand = 1;
1496 } else if (strcasecmp(optarg, "both") == 0) {
1497 opt_strand = 2;
1498 } else {
1499 fatal("The argument to --strand must be plus or both");
1500
1501 }
1502 break;
1503
1504 case option_threads:
1505 opt_threads = (int64_t) args_getdouble(optarg);
1506 break;
1507
1508 case option_gapopen:
1509 args_get_gap_penalty_string(optarg, 1);
1510 break;
1511
1512 case option_gapext:
1513 args_get_gap_penalty_string(optarg, 0);
1514 break;
1515
1516 case option_rowlen:
1517 opt_rowlen = args_getlong(optarg);
1518 break;
1519
1520 case option_userfields:
1521 if (!parse_userfields_arg(optarg)) {
1522 fatal("Unrecognized userfield argument");
1523
1524 }
1525 break;
1526
1527 case option_userout:
1528 opt_userout = optarg;
1529 break;
1530
1531 case option_self:
1532 opt_self = 1;
1533 break;
1534
1535 case option_blast6out:
1536 opt_blast6out = optarg;
1537 break;
1538
1539 case option_uc:
1540 opt_uc = optarg;
1541 break;
1542
1543 case option_weak_id:
1544 opt_weak_id = args_getdouble(optarg);
1545 break;
1546
1547 case option_uc_allhits:
1548 opt_uc_allhits = 1;
1549 break;
1550
1551 case option_notrunclabels:
1552 opt_notrunclabels = 1;
1553 break;
1554
1555 case option_sortbysize:
1556 opt_sortbysize = optarg;
1557 break;
1558
1559 case option_output:
1560 opt_output = optarg;
1561 break;
1562
1563 case option_minsize:
1564 opt_minsize = args_getlong(optarg);
1565 if (opt_minsize <= 0) {
1566 fatal("The argument to --minsize must be at least 1");
1567
1568 }
1569 break;
1570
1571 case option_maxsize:
1572 opt_maxsize = args_getlong(optarg);
1573 break;
1574
1575 case option_relabel:
1576 opt_relabel = optarg;
1577 break;
1578
1579 case option_sizeout:
1580 opt_sizeout = 1;
1581 break;
1582
1583 case option_derep_fulllength:
1584 opt_derep_fulllength = optarg;
1585 break;
1586
1587 case option_minseqlength:
1588 opt_minseqlength = args_getlong(optarg);
1589 if (opt_minseqlength < 0) {
1590 fatal("The argument to --minseqlength must not be negative");
1591
1592 }
1593 break;
1594
1595 case option_minuniquesize:
1596 opt_minuniquesize = args_getlong(optarg);
1597 break;
1598
1599 case option_topn:
1600 opt_topn = args_getlong(optarg);
1601 break;
1602
1603 case option_maxseqlength:
1604 opt_maxseqlength = args_getlong(optarg);
1605 break;
1606
1607 case option_sizein:
1608 opt_sizein = 1;
1609 break;
1610
1611 case option_sortbylength:
1612 opt_sortbylength = optarg;
1613 break;
1614
1615 case option_matched:
1616 opt_matched = optarg;
1617 break;
1618
1619 case option_notmatched:
1620 opt_notmatched = optarg;
1621 break;
1622
1623 case option_dbmatched:
1624 opt_dbmatched = optarg;
1625 break;
1626
1627 case option_dbnotmatched:
1628 opt_dbnotmatched = optarg;
1629 break;
1630
1631 case option_fastapairs:
1632 opt_fastapairs = optarg;
1633 break;
1634
1635 case option_output_no_hits:
1636 opt_output_no_hits = 1;
1637 break;
1638
1639 case option_maxhits:
1640 opt_maxhits = args_getlong(optarg);
1641 break;
1642
1643 case option_top_hits_only:
1644 opt_top_hits_only = 1;
1645 break;
1646
1647 case option_fasta_width:
1648 opt_fasta_width = args_getlong(optarg);
1649 break;
1650
1651 case option_query_cov:
1652 opt_query_cov = args_getdouble(optarg);
1653 break;
1654
1655 case option_target_cov:
1656 opt_target_cov = args_getdouble(optarg);
1657 break;
1658
1659 case option_idprefix:
1660 opt_idprefix = args_getlong(optarg);
1661 break;
1662
1663 case option_idsuffix:
1664 opt_idsuffix = args_getlong(optarg);
1665 break;
1666
1667 case option_minqt:
1668 opt_minqt = args_getdouble(optarg);
1669 break;
1670
1671 case option_maxqt:
1672 opt_maxqt = args_getdouble(optarg);
1673 break;
1674
1675 case option_minsl:
1676 opt_minsl = args_getdouble(optarg);
1677 break;
1678
1679 case option_maxsl:
1680 opt_maxsl = args_getdouble(optarg);
1681 break;
1682
1683 case option_leftjust:
1684 opt_leftjust = 1;
1685 break;
1686
1687 case option_rightjust:
1688 opt_rightjust = 1;
1689 break;
1690
1691 case option_selfid:
1692 opt_selfid = 1;
1693 break;
1694
1695 case option_maxid:
1696 opt_maxid = args_getdouble(optarg);
1697 break;
1698
1699 case option_minsizeratio:
1700 opt_minsizeratio = args_getdouble(optarg);
1701 break;
1702
1703 case option_maxsizeratio:
1704 opt_maxsizeratio = args_getdouble(optarg);
1705 break;
1706
1707 case option_maxdiffs:
1708 opt_maxdiffs = args_getlong(optarg);
1709 break;
1710
1711 case option_maxsubs:
1712 opt_maxsubs = args_getlong(optarg);
1713 break;
1714
1715 case option_maxgaps:
1716 opt_maxgaps = args_getlong(optarg);
1717 break;
1718
1719 case option_mincols:
1720 opt_mincols = args_getlong(optarg);
1721 break;
1722
1723 case option_maxqsize:
1724 opt_maxqsize = args_getlong(optarg);
1725 break;
1726
1727 case option_mintsize:
1728 opt_mintsize = args_getlong(optarg);
1729 break;
1730
1731 case option_mid:
1732 opt_mid = args_getdouble(optarg);
1733 break;
1734
1735 case option_shuffle:
1736 opt_shuffle = optarg;
1737 break;
1738
1739 case option_randseed:
1740 opt_randseed = args_getlong(optarg);
1741 break;
1742
1743 case option_maskfasta:
1744 opt_maskfasta = optarg;
1745 break;
1746
1747 case option_hardmask:
1748 opt_hardmask = 1;
1749 break;
1750
1751 case option_qmask:
1752 if (strcasecmp(optarg, "none") == 0) {
1753 opt_qmask = MASK_NONE;
1754 } else if (strcasecmp(optarg, "dust") == 0) {
1755 opt_qmask = MASK_DUST;
1756 } else if (strcasecmp(optarg, "soft") == 0) {
1757 opt_qmask = MASK_SOFT;
1758 } else {
1759 opt_qmask = MASK_ERROR;
1760
1761 }
1762 break;
1763
1764 case option_dbmask:
1765 if (strcasecmp(optarg, "none") == 0) {
1766 opt_dbmask = MASK_NONE;
1767 } else if (strcasecmp(optarg, "dust") == 0) {
1768 opt_dbmask = MASK_DUST;
1769 } else if (strcasecmp(optarg, "soft") == 0) {
1770 opt_dbmask = MASK_SOFT;
1771 } else {
1772 opt_dbmask = MASK_ERROR;
1773
1774 }
1775 break;
1776
1777 case option_cluster_smallmem:
1778 opt_cluster_smallmem = optarg;
1779 break;
1780
1781 case option_cluster_fast:
1782 opt_cluster_fast = optarg;
1783 break;
1784
1785 case option_centroids:
1786 opt_centroids = optarg;
1787 break;
1788
1789 case option_clusters:
1790 opt_clusters = optarg;
1791 break;
1792
1793 case option_consout:
1794 opt_consout = optarg;
1795 break;
1796
1797 case option_cons_truncate:
1798 fprintf(stderr, "WARNING: Option --cons_truncate is ignored\n");
1799 opt_cons_truncate = 1;
1800 break;
1801
1802 case option_msaout:
1803 opt_msaout = optarg;
1804 break;
1805
1806 case option_usersort:
1807 opt_usersort = 1;
1808 break;
1809
1810 case option_xn:
1811 opt_xn = args_getdouble(optarg);
1812 break;
1813
1814 case option_iddef:
1815 opt_iddef = args_getlong(optarg);
1816 break;
1817
1818 case option_slots:
1819 fprintf(stderr, "WARNING: Option --slots is ignored\n");
1820 opt_slots = args_getlong(optarg);
1821 break;
1822
1823 case option_pattern:
1824 fprintf(stderr, "WARNING: Option --pattern is ignored\n");
1825 opt_pattern = optarg;
1826 break;
1827
1828 case option_maxuniquesize:
1829 opt_maxuniquesize = args_getlong(optarg);
1830 break;
1831
1832 case option_abskew:
1833 opt_abskew = args_getdouble(optarg);
1834 break;
1835
1836 case option_chimeras:
1837 opt_chimeras = optarg;
1838 break;
1839
1840 case option_dn:
1841 opt_dn = args_getdouble(optarg);
1842 break;
1843
1844 case option_mindiffs:
1845 opt_mindiffs = args_getlong(optarg);
1846 break;
1847
1848 case option_mindiv:
1849 opt_mindiv = args_getdouble(optarg);
1850 break;
1851
1852 case option_minh:
1853 opt_minh = args_getdouble(optarg);
1854 break;
1855
1856 case option_nonchimeras:
1857 opt_nonchimeras = optarg;
1858 break;
1859
1860 case option_uchime_denovo:
1861 opt_uchime_denovo = optarg;
1862 break;
1863
1864 case option_uchime_ref:
1865 opt_uchime_ref = optarg;
1866 break;
1867
1868 case option_uchimealns:
1869 opt_uchimealns = optarg;
1870 break;
1871
1872 case option_uchimeout:
1873 opt_uchimeout = optarg;
1874 break;
1875
1876 case option_uchimeout5:
1877 opt_uchimeout5 = 1;
1878 break;
1879
1880 case option_alignwidth:
1881 opt_alignwidth = args_getlong(optarg);
1882 break;
1883
1884 case option_allpairs_global:
1885 opt_allpairs_global = optarg;
1886 break;
1887
1888 case option_acceptall:
1889 opt_acceptall = 1;
1890 break;
1891
1892 case option_cluster_size:
1893 opt_cluster_size = optarg;
1894 break;
1895
1896 case option_samout:
1897 opt_samout = optarg;
1898 break;
1899
1900 case option_log:
1901 opt_log = optarg;
1902 break;
1903
1904 case option_quiet:
1905 opt_quiet = true;
1906 break;
1907
1908 case option_fastx_subsample:
1909 opt_fastx_subsample = optarg;
1910 break;
1911
1912 case option_sample_pct:
1913 opt_sample_pct = args_getdouble(optarg);
1914 break;
1915
1916 case option_fastq_chars:
1917 opt_fastq_chars = optarg;
1918 break;
1919
1920 case option_profile:
1921 opt_profile = optarg;
1922 break;
1923
1924 case option_sample_size:
1925 opt_sample_size = args_getlong(optarg);
1926 break;
1927
1928 case option_fastaout:
1929 opt_fastaout = optarg;
1930 break;
1931
1932 case option_xsize:
1933 opt_xsize = true;
1934 break;
1935
1936 case option_clusterout_id:
1937 opt_clusterout_id = true;
1938 break;
1939
1940 case option_clusterout_sort:
1941 opt_clusterout_sort = true;
1942 break;
1943
1944 case option_borderline:
1945 opt_borderline = optarg;
1946 break;
1947
1948 case option_relabel_sha1:
1949 opt_relabel_sha1 = true;
1950 break;
1951
1952 case option_relabel_md5:
1953 opt_relabel_md5 = true;
1954 break;
1955
1956 case option_derep_prefix:
1957 opt_derep_prefix = optarg;
1958 break;
1959
1960 case option_fastq_filter:
1961 opt_fastq_filter = optarg;
1962 break;
1963
1964 case option_fastqout:
1965 opt_fastqout = optarg;
1966 break;
1967
1968 case option_fastaout_discarded:
1969 opt_fastaout_discarded = optarg;
1970 break;
1971
1972 case option_fastqout_discarded:
1973 opt_fastqout_discarded = optarg;
1974 break;
1975
1976 case option_fastq_truncqual:
1977 opt_fastq_truncqual = args_getlong(optarg);
1978 break;
1979
1980 case option_fastq_maxee:
1981 opt_fastq_maxee = args_getdouble(optarg);
1982 break;
1983
1984 case option_fastq_trunclen:
1985 opt_fastq_trunclen = args_getlong(optarg);
1986 break;
1987
1988 case option_fastq_minlen:
1989 opt_fastq_minlen = args_getlong(optarg);
1990 break;
1991
1992 case option_fastq_stripleft:
1993 opt_fastq_stripleft = args_getlong(optarg);
1994 break;
1995
1996 case option_fastq_maxee_rate:
1997 opt_fastq_maxee_rate = args_getdouble(optarg);
1998 break;
1999
2000 case option_fastq_maxns:
2001 opt_fastq_maxns = args_getlong(optarg);
2002 break;
2003
2004 case option_eeout:
2005 opt_eeout = true;
2006 break;
2007
2008 case option_fastq_ascii:
2009 opt_fastq_ascii = args_getlong(optarg);
2010 break;
2011
2012 case option_fastq_qmin:
2013 opt_fastq_qmin = args_getlong(optarg);
2014 break;
2015
2016 case option_fastq_qmax:
2017 opt_fastq_qmax = args_getlong(optarg);
2018 break;
2019
2020 case option_fastq_qmaxout:
2021 opt_fastq_qmaxout = args_getlong(optarg);
2022 break;
2023
2024 case option_fastq_stats:
2025 opt_fastq_stats = optarg;
2026 break;
2027
2028 case option_fastq_tail:
2029 opt_fastq_tail = args_getlong(optarg);
2030 break;
2031
2032 case option_fastx_revcomp:
2033 opt_fastx_revcomp = optarg;
2034 break;
2035
2036 case option_label_suffix:
2037 opt_label_suffix = optarg;
2038 break;
2039
2040 case option_h:
2041 opt_help = 1;
2042 break;
2043
2044 case option_samheader:
2045 opt_samheader = true;
2046 break;
2047
2048 case option_sizeorder:
2049 opt_sizeorder = true;
2050 break;
2051
2052 case option_minwordmatches:
2053 opt_minwordmatches = args_getlong(optarg);
2054 if (opt_minwordmatches < 0) {
2055 fatal("The argument to --minwordmatches must not be negative");
2056
2057 }
2058 break;
2059
2060 case option_v:
2061 opt_version = 1;
2062 break;
2063
2064 case option_relabel_keep:
2065 opt_relabel_keep = true;
2066 break;
2067
2068 case option_search_exact:
2069 opt_search_exact = optarg;
2070 break;
2071
2072 case option_fastx_mask:
2073 opt_fastx_mask = optarg;
2074 break;
2075
2076 case option_min_unmasked_pct:
2077 opt_min_unmasked_pct = args_getdouble(optarg);
2078 break;
2079
2080 case option_max_unmasked_pct:
2081 opt_max_unmasked_pct = args_getdouble(optarg);
2082 break;
2083
2084 case option_fastq_convert:
2085 opt_fastq_convert = optarg;
2086 break;
2087
2088 case option_fastq_asciiout:
2089 opt_fastq_asciiout = args_getlong(optarg);
2090 break;
2091
2092 case option_fastq_qminout:
2093 opt_fastq_qminout = args_getlong(optarg);
2094 break;
2095
2096 case option_fastq_mergepairs:
2097 opt_fastq_mergepairs = optarg;
2098 break;
2099
2100 case option_fastq_eeout:
2101 opt_fastq_eeout = true;
2102 break;
2103
2104 case option_fastqout_notmerged_fwd:
2105 opt_fastqout_notmerged_fwd = optarg;
2106 break;
2107
2108 case option_fastqout_notmerged_rev:
2109 opt_fastqout_notmerged_rev = optarg;
2110 break;
2111
2112 case option_fastq_minovlen:
2113 opt_fastq_minovlen = args_getlong(optarg);
2114 break;
2115
2116 case option_fastq_minmergelen:
2117 opt_fastq_minmergelen = args_getlong(optarg);
2118 break;
2119
2120 case option_fastq_maxmergelen:
2121 opt_fastq_maxmergelen = args_getlong(optarg);
2122 break;
2123
2124 case option_fastq_nostagger:
2125 opt_fastq_nostagger = optarg;
2126 break;
2127
2128 case option_fastq_allowmergestagger:
2129 opt_fastq_allowmergestagger = true;
2130 break;
2131
2132 case option_fastq_maxdiffs:
2133 opt_fastq_maxdiffs = args_getlong(optarg);
2134 break;
2135
2136 case option_fastaout_notmerged_fwd:
2137 opt_fastaout_notmerged_fwd = optarg;
2138 break;
2139
2140 case option_fastaout_notmerged_rev:
2141 opt_fastaout_notmerged_rev = optarg;
2142 break;
2143
2144 case option_reverse:
2145 opt_reverse = optarg;
2146 break;
2147
2148 case option_eetabbedout:
2149 opt_eetabbedout = optarg;
2150 break;
2151
2152 case option_fasta_score:
2153 opt_fasta_score = true;
2154 break;
2155
2156 case option_fastq_eestats:
2157 opt_fastq_eestats = optarg;
2158 break;
2159
2160 case option_rereplicate:
2161 opt_rereplicate = optarg;
2162 break;
2163
2164 case option_xdrop_nw:
2165 /* xdrop_nw ignored */
2166 fprintf(stderr, "WARNING: Option --xdrop_nw is ignored\n");
2167 break;
2168
2169 case option_minhsp:
2170 /* minhsp ignored */
2171 fprintf(stderr, "WARNING: Option --minhsp is ignored\n");
2172 break;
2173
2174 case option_band:
2175 /* band ignored */
2176 fprintf(stderr, "WARNING: Option --band is ignored\n");
2177 break;
2178
2179 case option_hspw:
2180 /* hspw ignored */
2181 fprintf(stderr, "WARNING: Option --hspw is ignored\n");
2182 break;
2183
2184 case option_gzip_decompress:
2185 opt_gzip_decompress = true;
2186 break;
2187
2188 case option_bzip2_decompress:
2189 opt_bzip2_decompress = true;
2190 break;
2191
2192 case option_fastq_maxlen:
2193 opt_fastq_maxlen = args_getlong(optarg);
2194 break;
2195
2196 case option_fastq_truncee:
2197 opt_fastq_truncee = args_getdouble(optarg);
2198 break;
2199
2200 case option_fastx_filter:
2201 opt_fastx_filter = optarg;
2202 break;
2203
2204 case option_otutabout:
2205 opt_otutabout = optarg;
2206 break;
2207
2208 case option_mothur_shared_out:
2209 opt_mothur_shared_out = optarg;
2210 break;
2211
2212 case option_biomout:
2213 opt_biomout = optarg;
2214 break;
2215
2216 case option_fastq_trunclen_keep:
2217 opt_fastq_trunclen_keep = args_getlong(optarg);
2218 break;
2219
2220 case option_fastq_stripright:
2221 opt_fastq_stripright = args_getlong(optarg);
2222 break;
2223
2224 case option_no_progress:
2225 opt_no_progress = true;
2226 break;
2227
2228 case option_fastq_eestats2:
2229 opt_fastq_eestats2 = optarg;
2230 break;
2231
2232 case option_ee_cutoffs:
2233 args_get_ee_cutoffs(optarg);
2234 break;
2235
2236 case option_length_cutoffs:
2237 args_get_length_cutoffs(optarg);
2238 break;
2239
2240 case option_makeudb_usearch:
2241 opt_makeudb_usearch = optarg;
2242 break;
2243
2244 case option_udb2fasta:
2245 opt_udb2fasta = optarg;
2246 break;
2247
2248 case option_udbinfo:
2249 opt_udbinfo = optarg;
2250 break;
2251
2252 case option_udbstats:
2253 opt_udbstats = optarg;
2254 break;
2255
2256 case option_cluster_unoise:
2257 opt_cluster_unoise = optarg;
2258 break;
2259
2260 case option_unoise_alpha:
2261 opt_unoise_alpha = args_getdouble(optarg);
2262 break;
2263
2264 case option_uchime2_denovo:
2265 opt_uchime2_denovo = optarg;
2266 break;
2267
2268 case option_uchime3_denovo:
2269 opt_uchime3_denovo = optarg;
2270 break;
2271
2272 case option_sintax:
2273 opt_sintax = optarg;
2274 break;
2275
2276 case option_sintax_cutoff:
2277 opt_sintax_cutoff = args_getdouble(optarg);
2278 break;
2279
2280 case option_tabbedout:
2281 opt_tabbedout = optarg;
2282 break;
2283
2284 case option_fastq_maxdiffpct:
2285 opt_fastq_maxdiffpct = args_getdouble(optarg);
2286 break;
2287
2288 case option_fastq_join:
2289 opt_fastq_join = optarg;
2290 break;
2291
2292 case option_join_padgap:
2293 opt_join_padgap = optarg;
2294 break;
2295
2296 case option_join_padgapq:
2297 opt_join_padgapq = optarg;
2298 break;
2299
2300 case option_sff_convert:
2301 opt_sff_convert = optarg;
2302 break;
2303
2304 case option_sff_clip:
2305 opt_sff_clip = true;
2306 break;
2307
2308 case option_fastaout_rev:
2309 opt_fastaout_rev = optarg;
2310 break;
2311
2312 case option_fastaout_discarded_rev:
2313 opt_fastaout_discarded_rev = optarg;
2314 break;
2315
2316 case option_fastqout_rev:
2317 opt_fastqout_rev = optarg;
2318 break;
2319
2320 case option_fastqout_discarded_rev:
2321 opt_fastqout_discarded_rev = optarg;
2322 break;
2323
2324 case option_xee:
2325 opt_xee = true;
2326 break;
2327
2328 case option_fastx_getseq:
2329 opt_fastx_getseq = optarg;
2330 break;
2331
2332 case option_fastx_getseqs:
2333 opt_fastx_getseqs = optarg;
2334 break;
2335
2336 case option_fastx_getsubseq:
2337 opt_fastx_getsubseq = optarg;
2338 break;
2339
2340 case option_label_substr_match:
2341 opt_label_substr_match = true;
2342 break;
2343
2344 case option_label:
2345 opt_label = optarg;
2346 break;
2347
2348 case option_subseq_start:
2349 opt_subseq_start = args_getlong(optarg);
2350 break;
2351
2352 case option_subseq_end:
2353 opt_subseq_end = args_getlong(optarg);
2354 break;
2355
2356 case option_notmatchedfq:
2357 opt_notmatchedfq = optarg;
2358 break;
2359
2360 case option_label_field:
2361 opt_label_field = optarg;
2362 break;
2363
2364 case option_label_word:
2365 opt_label_word = optarg;
2366 break;
2367
2368 case option_label_words:
2369 opt_label_words = optarg;
2370 break;
2371
2372 case option_labels:
2373 opt_labels = optarg;
2374 break;
2375
2376 case option_cut:
2377 opt_cut = optarg;
2378 break;
2379
2380 case option_cut_pattern:
2381 opt_cut_pattern = optarg;
2382 break;
2383
2384 case option_relabel_self:
2385 opt_relabel_self = true;
2386 break;
2387
2388 case option_derep_id:
2389 opt_derep_id = optarg;
2390 break;
2391
2392 case option_orient:
2393 opt_orient = optarg;
2394 break;
2395
2396 default:
2397 fatal("Internal error in option parsing");
2398 }
2399 }
2400
2401 /* Terminate if ambiguous or illegal options have been detected */
2402 if (c != -1) {
2403 exit(EXIT_FAILURE);
2404
2405 }
2406
2407 /* Terminate after reporting any extra non-option arguments */
2408 if (optind < argc) {
2409 fatal("Unrecognized string on command line (%s)", argv[optind]);
2410
2411 }
2412
2413 /* Below is a list of all command names, in alphabetical order. */
2414
2415 int command_options[] =
2416 {
2417 option_allpairs_global,
2418 option_cluster_fast,
2419 option_cluster_size,
2420 option_cluster_smallmem,
2421 option_cluster_unoise,
2422 option_cut,
2423 option_derep_fulllength,
2424 option_derep_id,
2425 option_derep_prefix,
2426 option_fastq_chars,
2427 option_fastq_convert,
2428 option_fastq_eestats,
2429 option_fastq_eestats2,
2430 option_fastq_filter,
2431 option_fastq_join,
2432 option_fastq_mergepairs,
2433 option_fastq_stats,
2434 option_fastx_filter,
2435 option_fastx_getseq,
2436 option_fastx_getseqs,
2437 option_fastx_getsubseq,
2438 option_fastx_mask,
2439 option_fastx_revcomp,
2440 option_fastx_subsample,
2441 option_h,
2442 option_help,
2443 option_makeudb_usearch,
2444 option_maskfasta,
2445 option_orient,
2446 option_rereplicate,
2447 option_search_exact,
2448 option_sff_convert,
2449 option_shuffle,
2450 option_sintax,
2451 option_sortbylength,
2452 option_sortbysize,
2453 option_uchime2_denovo,
2454 option_uchime3_denovo,
2455 option_uchime_denovo,
2456 option_uchime_ref,
2457 option_udb2fasta,
2458 option_udbinfo,
2459 option_udbstats,
2460 option_usearch_global,
2461 option_v,
2462 option_version
2463 };
2464
2465 const int commands_count = sizeof(command_options) / sizeof(int);
2466
2467 /*
2468 Below is a list of all the options that are valid for each command.
2469 The first line is the command and the lines below are the valid options.
2470 */
2471
2472 const int valid_options[][92] =
2473 {
2474 {
2475 option_allpairs_global,
2476 option_acceptall,
2477 option_alnout,
2478 option_band,
2479 option_blast6out,
2480 option_bzip2_decompress,
2481 option_fasta_width,
2482 option_fastapairs,
2483 option_fulldp,
2484 option_gapext,
2485 option_gapopen,
2486 option_gzip_decompress,
2487 option_hardmask,
2488 option_hspw,
2489 option_id,
2490 option_iddef,
2491 option_idprefix,
2492 option_idsuffix,
2493 option_leftjust,
2494 option_log,
2495 option_match,
2496 option_matched,
2497 option_maxaccepts,
2498 option_maxdiffs,
2499 option_maxgaps,
2500 option_maxhits,
2501 option_maxid,
2502 option_maxqsize,
2503 option_maxqt,
2504 option_maxrejects,
2505 option_maxseqlength,
2506 option_maxsizeratio,
2507 option_maxsl,
2508 option_maxsubs,
2509 option_mid,
2510 option_mincols,
2511 option_minhsp,
2512 option_minqt,
2513 option_minseqlength,
2514 option_minsizeratio,
2515 option_minsl,
2516 option_mintsize,
2517 option_minwordmatches,
2518 option_mismatch,
2519 option_no_progress,
2520 option_notmatched,
2521 option_notrunclabels,
2522 option_output_no_hits,
2523 option_pattern,
2524 option_qmask,
2525 option_query_cov,
2526 option_quiet,
2527 option_relabel,
2528 option_relabel_keep,
2529 option_relabel_md5,
2530 option_relabel_self,
2531 option_relabel_sha1,
2532 option_rightjust,
2533 option_rowlen,
2534 option_samheader,
2535 option_samout,
2536 option_self,
2537 option_selfid,
2538 option_sizein,
2539 option_sizeout,
2540 option_slots,
2541 option_target_cov,
2542 option_threads,
2543 option_top_hits_only,
2544 option_uc,
2545 option_userfields,
2546 option_userout,
2547 option_weak_id,
2548 option_wordlength,
2549 option_xdrop_nw,
2550 option_xee,
2551 option_xsize,
2552 -1 },
2553
2554 { option_cluster_fast,
2555 option_alnout,
2556 option_band,
2557 option_biomout,
2558 option_blast6out,
2559 option_bzip2_decompress,
2560 option_centroids,
2561 option_clusterout_id,
2562 option_clusterout_sort,
2563 option_clusters,
2564 option_cons_truncate,
2565 option_consout,
2566 option_fasta_width,
2567 option_fastapairs,
2568 option_fulldp,
2569 option_gapext,
2570 option_gapopen,
2571 option_gzip_decompress,
2572 option_hardmask,
2573 option_hspw,
2574 option_id,
2575 option_iddef,
2576 option_idprefix,
2577 option_idsuffix,
2578 option_leftjust,
2579 option_log,
2580 option_match,
2581 option_matched,
2582 option_maxaccepts,
2583 option_maxdiffs,
2584 option_maxgaps,
2585 option_maxhits,
2586 option_maxid,
2587 option_maxqsize,
2588 option_maxqt,
2589 option_maxrejects,
2590 option_maxseqlength,
2591 option_maxsizeratio,
2592 option_maxsl,
2593 option_maxsubs,
2594 option_mid,
2595 option_mincols,
2596 option_minhsp,
2597 option_minqt,
2598 option_minseqlength,
2599 option_minsizeratio,
2600 option_minsl,
2601 option_mintsize,
2602 option_minwordmatches,
2603 option_mismatch,
2604 option_mothur_shared_out,
2605 option_msaout,
2606 option_no_progress,
2607 option_notmatched,
2608 option_notrunclabels,
2609 option_otutabout,
2610 option_output_no_hits,
2611 option_pattern,
2612 option_profile,
2613 option_qmask,
2614 option_query_cov,
2615 option_quiet,
2616 option_relabel,
2617 option_relabel_keep,
2618 option_relabel_md5,
2619 option_relabel_self,
2620 option_relabel_sha1,
2621 option_rightjust,
2622 option_rowlen,
2623 option_samheader,
2624 option_samout,
2625 option_self,
2626 option_selfid,
2627 option_sizein,
2628 option_sizeorder,
2629 option_sizeout,
2630 option_slots,
2631 option_strand,
2632 option_target_cov,
2633 option_threads,
2634 option_top_hits_only,
2635 option_uc,
2636 option_userfields,
2637 option_userout,
2638 option_weak_id,
2639 option_wordlength,
2640 option_xdrop_nw,
2641 option_xee,
2642 option_xsize,
2643 -1 },
2644
2645 { option_cluster_size,
2646 option_alnout,
2647 option_band,
2648 option_biomout,
2649 option_blast6out,
2650 option_bzip2_decompress,
2651 option_centroids,
2652 option_clusterout_id,
2653 option_clusterout_sort,
2654 option_clusters,
2655 option_cons_truncate,
2656 option_consout,
2657 option_fasta_width,
2658 option_fastapairs,
2659 option_fulldp,
2660 option_gapext,
2661 option_gapopen,
2662 option_gzip_decompress,
2663 option_hardmask,
2664 option_hspw,
2665 option_id,
2666 option_iddef,
2667 option_idprefix,
2668 option_idsuffix,
2669 option_leftjust,
2670 option_log,
2671 option_match,
2672 option_matched,
2673 option_maxaccepts,
2674 option_maxdiffs,
2675 option_maxgaps,
2676 option_maxhits,
2677 option_maxid,
2678 option_maxqsize,
2679 option_maxqt,
2680 option_maxrejects,
2681 option_maxseqlength,
2682 option_maxsizeratio,
2683 option_maxsl,
2684 option_maxsubs,
2685 option_mid,
2686 option_mincols,
2687 option_minhsp,
2688 option_minqt,
2689 option_minseqlength,
2690 option_minsizeratio,
2691 option_minsl,
2692 option_mintsize,
2693 option_minwordmatches,
2694 option_mismatch,
2695 option_mothur_shared_out,
2696 option_msaout,
2697 option_no_progress,
2698 option_notmatched,
2699 option_notrunclabels,
2700 option_otutabout,
2701 option_output_no_hits,
2702 option_pattern,
2703 option_profile,
2704 option_qmask,
2705 option_query_cov,
2706 option_quiet,
2707 option_relabel,
2708 option_relabel_keep,
2709 option_relabel_md5,
2710 option_relabel_self,
2711 option_relabel_sha1,
2712 option_rightjust,
2713 option_rowlen,
2714 option_samheader,
2715 option_samout,
2716 option_self,
2717 option_selfid,
2718 option_sizein,
2719 option_sizeorder,
2720 option_sizeout,
2721 option_slots,
2722 option_strand,
2723 option_target_cov,
2724 option_threads,
2725 option_top_hits_only,
2726 option_uc,
2727 option_userfields,
2728 option_userout,
2729 option_weak_id,
2730 option_wordlength,
2731 option_xdrop_nw,
2732 option_xee,
2733 option_xsize,
2734 -1 },
2735
2736 { option_cluster_smallmem,
2737 option_alnout,
2738 option_band,
2739 option_biomout,
2740 option_blast6out,
2741 option_bzip2_decompress,
2742 option_centroids,
2743 option_clusterout_id,
2744 option_clusterout_sort,
2745 option_clusters,
2746 option_cons_truncate,
2747 option_consout,
2748 option_fasta_width,
2749 option_fastapairs,
2750 option_fulldp,
2751 option_gapext,
2752 option_gapopen,
2753 option_gzip_decompress,
2754 option_hardmask,
2755 option_hspw,
2756 option_id,
2757 option_iddef,
2758 option_idprefix,
2759 option_idsuffix,
2760 option_leftjust,
2761 option_log,
2762 option_match,
2763 option_matched,
2764 option_maxaccepts,
2765 option_maxdiffs,
2766 option_maxgaps,
2767 option_maxhits,
2768 option_maxid,
2769 option_maxqsize,
2770 option_maxqt,
2771 option_maxrejects,
2772 option_maxseqlength,
2773 option_maxsizeratio,
2774 option_maxsl,
2775 option_maxsubs,
2776 option_mid,
2777 option_mincols,
2778 option_minhsp,
2779 option_minqt,
2780 option_minseqlength,
2781 option_minsizeratio,
2782 option_minsl,
2783 option_mintsize,
2784 option_minwordmatches,
2785 option_mismatch,
2786 option_mothur_shared_out,
2787 option_msaout,
2788 option_no_progress,
2789 option_notmatched,
2790 option_notrunclabels,
2791 option_otutabout,
2792 option_output_no_hits,
2793 option_pattern,
2794 option_profile,
2795 option_qmask,
2796 option_query_cov,
2797 option_quiet,
2798 option_relabel,
2799 option_relabel_keep,
2800 option_relabel_md5,
2801 option_relabel_self,
2802 option_relabel_sha1,
2803 option_rightjust,
2804 option_rowlen,
2805 option_samheader,
2806 option_samout,
2807 option_self,
2808 option_selfid,
2809 option_sizein,
2810 option_sizeorder,
2811 option_sizeout,
2812 option_slots,
2813 option_strand,
2814 option_target_cov,
2815 option_threads,
2816 option_top_hits_only,
2817 option_uc,
2818 option_userfields,
2819 option_userout,
2820 option_usersort,
2821 option_weak_id,
2822 option_wordlength,
2823 option_xdrop_nw,
2824 option_xee,
2825 option_xsize,
2826 -1 },
2827
2828 { option_cluster_unoise,
2829 option_alnout,
2830 option_band,
2831 option_biomout,
2832 option_blast6out,
2833 option_bzip2_decompress,
2834 option_centroids,
2835 option_clusterout_id,
2836 option_clusterout_sort,
2837 option_clusters,
2838 option_cons_truncate,
2839 option_consout,
2840 option_fasta_width,
2841 option_fastapairs,
2842 option_fulldp,
2843 option_gapext,
2844 option_gapopen,
2845 option_gzip_decompress,
2846 option_hardmask,
2847 option_hspw,
2848 option_id,
2849 option_iddef,
2850 option_idprefix,
2851 option_idsuffix,
2852 option_leftjust,
2853 option_log,
2854 option_match,
2855 option_matched,
2856 option_maxaccepts,
2857 option_maxdiffs,
2858 option_maxgaps,
2859 option_maxhits,
2860 option_maxid,
2861 option_maxqsize,
2862 option_maxqt,
2863 option_maxrejects,
2864 option_maxseqlength,
2865 option_maxsizeratio,
2866 option_maxsl,
2867 option_maxsubs,
2868 option_mid,
2869 option_mincols,
2870 option_minhsp,
2871 option_minqt,
2872 option_minseqlength,
2873 option_minsizeratio,
2874 option_minsize,
2875 option_minsl,
2876 option_mintsize,
2877 option_minwordmatches,
2878 option_mismatch,
2879 option_mothur_shared_out,
2880 option_msaout,
2881 option_no_progress,
2882 option_notmatched,
2883 option_notrunclabels,
2884 option_otutabout,
2885 option_output_no_hits,
2886 option_pattern,
2887 option_profile,
2888 option_qmask,
2889 option_query_cov,
2890 option_quiet,
2891 option_relabel,
2892 option_relabel_keep,
2893 option_relabel_md5,
2894 option_relabel_self,
2895 option_relabel_sha1,
2896 option_rightjust,
2897 option_rowlen,
2898 option_samheader,
2899 option_samout,
2900 option_self,
2901 option_selfid,
2902 option_sizein,
2903 option_sizeorder,
2904 option_sizeout,
2905 option_slots,
2906 option_strand,
2907 option_target_cov,
2908 option_threads,
2909 option_top_hits_only,
2910 option_uc,
2911 option_unoise_alpha,
2912 option_userfields,
2913 option_userout,
2914 option_weak_id,
2915 option_wordlength,
2916 option_xdrop_nw,
2917 option_xee,
2918 option_xsize,
2919 -1 },
2920
2921 { option_cut,
2922 option_bzip2_decompress,
2923 option_cut_pattern,
2924 option_fasta_width,
2925 option_fastaout,
2926 option_fastaout_discarded,
2927 option_fastaout_discarded_rev,
2928 option_fastaout_rev,
2929 option_gzip_decompress,
2930 option_log,
2931 option_no_progress,
2932 option_notrunclabels,
2933 option_quiet,
2934 option_relabel,
2935 option_relabel_keep,
2936 option_relabel_md5,
2937 option_relabel_self,
2938 option_relabel_sha1,
2939 option_sizein,
2940 option_sizeout,
2941 option_xee,
2942 option_xsize,
2943 -1 },
2944
2945 { option_derep_fulllength,
2946 option_bzip2_decompress,
2947 option_fasta_width,
2948 option_gzip_decompress,
2949 option_log,
2950 option_maxseqlength,
2951 option_maxuniquesize,
2952 option_minseqlength,
2953 option_minuniquesize,
2954 option_no_progress,
2955 option_notrunclabels,
2956 option_output,
2957 option_quiet,
2958 option_relabel,
2959 option_relabel_keep,
2960 option_relabel_md5,
2961 option_relabel_self,
2962 option_relabel_sha1,
2963 option_sizein,
2964 option_sizeout,
2965 option_strand,
2966 option_threads,
2967 option_topn,
2968 option_uc,
2969 option_xee,
2970 option_xsize,
2971 -1 },
2972
2973 { option_derep_id,
2974 option_bzip2_decompress,
2975 option_fasta_width,
2976 option_gzip_decompress,
2977 option_log,
2978 option_maxseqlength,
2979 option_maxuniquesize,
2980 option_minseqlength,
2981 option_minuniquesize,
2982 option_no_progress,
2983 option_notrunclabels,
2984 option_output,
2985 option_quiet,
2986 option_relabel,
2987 option_relabel_keep,
2988 option_relabel_md5,
2989 option_relabel_self,
2990 option_relabel_sha1,
2991 option_sizein,
2992 option_sizeout,
2993 option_strand,
2994 option_threads,
2995 option_topn,
2996 option_uc,
2997 option_xee,
2998 option_xsize,
2999 -1 },
3000
3001 { option_derep_prefix,
3002 option_bzip2_decompress,
3003 option_fasta_width,
3004 option_gzip_decompress,
3005 option_log,
3006 option_maxseqlength,
3007 option_maxuniquesize,
3008 option_minseqlength,
3009 option_minuniquesize,
3010 option_no_progress,
3011 option_notrunclabels,
3012 option_output,
3013 option_quiet,
3014 option_relabel,
3015 option_relabel_keep,
3016 option_relabel_md5,
3017 option_relabel_self,
3018 option_relabel_sha1,
3019 option_sizein,
3020 option_sizeout,
3021 option_strand,
3022 option_threads,
3023 option_topn,
3024 option_uc,
3025 option_xee,
3026 option_xsize,
3027 -1 },
3028
3029 { option_fastq_chars,
3030 option_bzip2_decompress,
3031 option_fastq_tail,
3032 option_gzip_decompress,
3033 option_log,
3034 option_no_progress,
3035 option_quiet,
3036 option_threads,
3037 -1 },
3038
3039 { option_fastq_convert,
3040 option_bzip2_decompress,
3041 option_fastq_ascii,
3042 option_fastq_asciiout,
3043 option_fastq_qmax,
3044 option_fastq_qmaxout,
3045 option_fastq_qmin,
3046 option_fastq_qminout,
3047 option_fastqout,
3048 option_gzip_decompress,
3049 option_log,
3050 option_no_progress,
3051 option_quiet,
3052 option_relabel,
3053 option_relabel_keep,
3054 option_relabel_md5,
3055 option_relabel_self,
3056 option_relabel_sha1,
3057 option_sizein,
3058 option_sizeout,
3059 option_threads,
3060 option_xee,
3061 option_xsize,
3062 -1 },
3063
3064 { option_fastq_eestats,
3065 option_bzip2_decompress,
3066 option_fastq_ascii,
3067 option_fastq_qmax,
3068 option_fastq_qmin,
3069 option_gzip_decompress,
3070 option_log,
3071 option_no_progress,
3072 option_output,
3073 option_quiet,
3074 option_threads,
3075 -1 },
3076
3077 { option_fastq_eestats2,
3078 option_bzip2_decompress,
3079 option_ee_cutoffs,
3080 option_fastq_ascii,
3081 option_fastq_qmax,
3082 option_fastq_qmin,
3083 option_gzip_decompress,
3084 option_length_cutoffs,
3085 option_log,
3086 option_no_progress,
3087 option_output,
3088 option_quiet,
3089 option_threads,
3090 -1 },
3091
3092 { option_fastq_filter,
3093 option_bzip2_decompress,
3094 option_eeout,
3095 option_fasta_width,
3096 option_fastaout,
3097 option_fastaout_discarded,
3098 option_fastaout_discarded_rev,
3099 option_fastaout_rev,
3100 option_fastq_ascii,
3101 option_fastq_eeout,
3102 option_fastq_maxee,
3103 option_fastq_maxee_rate,
3104 option_fastq_maxlen,
3105 option_fastq_maxns,
3106 option_fastq_minlen,
3107 option_fastq_qmax,
3108 option_fastq_qmin,
3109 option_fastq_stripleft,
3110 option_fastq_stripright,
3111 option_fastq_truncee,
3112 option_fastq_trunclen,
3113 option_fastq_trunclen_keep,
3114 option_fastq_truncqual,
3115 option_fastqout,
3116 option_fastqout_discarded,
3117 option_fastqout_discarded_rev,
3118 option_fastqout_rev,
3119 option_gzip_decompress,
3120 option_log,
3121 option_maxsize,
3122 option_minsize,
3123 option_no_progress,
3124 option_quiet,
3125 option_relabel,
3126 option_relabel_keep,
3127 option_relabel_md5,
3128 option_relabel_self,
3129 option_relabel_sha1,
3130 option_reverse,
3131 option_sizein,
3132 option_sizeout,
3133 option_threads,
3134 option_xee,
3135 option_xsize,
3136 -1 },
3137
3138 { option_fastq_join,
3139 option_bzip2_decompress,
3140 option_fasta_width,
3141 option_fastaout,
3142 option_fastq_ascii,
3143 option_fastq_qmax,
3144 option_fastq_qmin,
3145 option_fastqout,
3146 option_gzip_decompress,
3147 option_join_padgap,
3148 option_join_padgapq,
3149 option_log,
3150 option_no_progress,
3151 option_quiet,
3152 option_relabel,
3153 option_relabel_keep,
3154 option_relabel_md5,
3155 option_relabel_self,
3156 option_relabel_sha1,
3157 option_reverse,
3158 option_sizein,
3159 option_sizeout,
3160 option_threads,
3161 option_xee,
3162 option_xsize,
3163 -1 },
3164
3165 { option_fastq_mergepairs,
3166 option_bzip2_decompress,
3167 option_eeout,
3168 option_eetabbedout,
3169 option_fasta_width,
3170 option_fastaout,
3171 option_fastaout_notmerged_fwd,
3172 option_fastaout_notmerged_rev,
3173 option_fastq_allowmergestagger,
3174 option_fastq_ascii,
3175 option_fastq_eeout,
3176 option_fastq_maxdiffpct,
3177 option_fastq_maxdiffs,
3178 option_fastq_maxee,
3179 option_fastq_maxlen,
3180 option_fastq_maxmergelen,
3181 option_fastq_maxns,
3182 option_fastq_minlen,
3183 option_fastq_minmergelen,
3184 option_fastq_minovlen,
3185 option_fastq_nostagger,
3186 option_fastq_qmax,
3187 option_fastq_qmaxout,
3188 option_fastq_qmin,
3189 option_fastq_qminout,
3190 option_fastq_truncqual,
3191 option_fastqout,
3192 option_fastqout_notmerged_fwd,
3193 option_fastqout_notmerged_rev,
3194 option_gzip_decompress,
3195 option_label_suffix,
3196 option_log,
3197 option_no_progress,
3198 option_quiet,
3199 option_relabel,
3200 option_relabel_keep,
3201 option_relabel_md5,
3202 option_relabel_self,
3203 option_relabel_sha1,
3204 option_reverse,
3205 option_sizein,
3206 option_sizeout,
3207 option_threads,
3208 option_xee,
3209 option_xsize,
3210 -1 },
3211
3212 { option_fastq_stats,
3213 option_bzip2_decompress,
3214 option_fastq_ascii,
3215 option_fastq_qmax,
3216 option_fastq_qmin,
3217 option_gzip_decompress,
3218 option_log,
3219 option_no_progress,
3220 option_output,
3221 option_quiet,
3222 option_threads,
3223 -1 },
3224
3225 { option_fastx_filter,
3226 option_bzip2_decompress,
3227 option_eeout,
3228 option_fasta_width,
3229 option_fastaout,
3230 option_fastaout_discarded,
3231 option_fastaout_discarded_rev,
3232 option_fastaout_rev,
3233 option_fastq_ascii,
3234 option_fastq_eeout,
3235 option_fastq_maxee,
3236 option_fastq_maxee_rate,
3237 option_fastq_maxlen,
3238 option_fastq_maxns,
3239 option_fastq_minlen,
3240 option_fastq_qmax,
3241 option_fastq_qmin,
3242 option_fastq_stripleft,
3243 option_fastq_stripright,
3244 option_fastq_truncee,
3245 option_fastq_trunclen,
3246 option_fastq_trunclen_keep,
3247 option_fastq_truncqual,
3248 option_fastqout,
3249 option_fastqout_discarded,
3250 option_fastqout_discarded_rev,
3251 option_fastqout_rev,
3252 option_gzip_decompress,
3253 option_log,
3254 option_maxsize,
3255 option_minsize,
3256 option_no_progress,
3257 option_notrunclabels,
3258 option_quiet,
3259 option_relabel,
3260 option_relabel_keep,
3261 option_relabel_md5,
3262 option_relabel_self,
3263 option_relabel_sha1,
3264 option_reverse,
3265 option_sizein,
3266 option_sizeout,
3267 option_threads,
3268 option_xee,
3269 option_xsize,
3270 -1 },
3271
3272 { option_fastx_getseq,
3273 option_bzip2_decompress,
3274 option_fasta_width,
3275 option_fastaout,
3276 option_fastq_ascii,
3277 option_fastq_qmax,
3278 option_fastq_qmin,
3279 option_fastqout,
3280 option_gzip_decompress,
3281 option_label,
3282 option_label_substr_match,
3283 option_log,
3284 option_no_progress,
3285 option_notmatched,
3286 option_notmatchedfq,
3287 option_notrunclabels,
3288 option_quiet,
3289 option_relabel,
3290 option_relabel_keep,
3291 option_relabel_md5,
3292 option_relabel_self,
3293 option_relabel_sha1,
3294 option_sizein,
3295 option_sizeout,
3296 option_threads,
3297 option_xee,
3298 option_xsize,
3299 -1 },
3300
3301 { option_fastx_getseqs,
3302 option_bzip2_decompress,
3303 option_fasta_width,
3304 option_fastaout,
3305 option_fastq_ascii,
3306 option_fastq_qmax,
3307 option_fastq_qmin,
3308 option_fastqout,
3309 option_gzip_decompress,
3310 option_label,
3311 option_label_field,
3312 option_label_substr_match,
3313 option_label_word,
3314 option_label_words,
3315 option_labels,
3316 option_log,
3317 option_no_progress,
3318 option_notmatched,
3319 option_notmatchedfq,
3320 option_notrunclabels,
3321 option_quiet,
3322 option_relabel,
3323 option_relabel_keep,
3324 option_relabel_md5,
3325 option_relabel_self,
3326 option_relabel_sha1,
3327 option_sizein,
3328 option_sizeout,
3329 option_threads,
3330 option_xee,
3331 option_xsize,
3332 -1 },
3333
3334 { option_fastx_getsubseq,
3335 option_bzip2_decompress,
3336 option_fasta_width,
3337 option_fastaout,
3338 option_fastq_ascii,
3339 option_fastq_qmax,
3340 option_fastq_qmin,
3341 option_fastqout,
3342 option_gzip_decompress,
3343 option_label,
3344 option_label_substr_match,
3345 option_log,
3346 option_no_progress,
3347 option_notmatched,
3348 option_notmatchedfq,
3349 option_notrunclabels,
3350 option_quiet,
3351 option_relabel,
3352 option_relabel_keep,
3353 option_relabel_md5,
3354 option_relabel_self,
3355 option_relabel_sha1,
3356 option_sizein,
3357 option_sizeout,
3358 option_subseq_end,
3359 option_subseq_start,
3360 option_threads,
3361 option_xee,
3362 option_xsize,
3363 -1 },
3364
3365 { option_fastx_mask,
3366 option_bzip2_decompress,
3367 option_fasta_width,
3368 option_fastaout,
3369 option_fastq_ascii,
3370 option_fastq_qmax,
3371 option_fastq_qmin,
3372 option_fastqout,
3373 option_gzip_decompress,
3374 option_hardmask,
3375 option_log,
3376 option_max_unmasked_pct,
3377 option_min_unmasked_pct,
3378 option_no_progress,
3379 option_notrunclabels,
3380 option_qmask,
3381 option_quiet,
3382 option_relabel,
3383 option_relabel_keep,
3384 option_relabel_md5,
3385 option_relabel_self,
3386 option_relabel_sha1,
3387 option_sizein,
3388 option_sizeout,
3389 option_threads,
3390 option_xee,
3391 option_xsize,
3392 -1 },
3393
3394 { option_fastx_revcomp,
3395 option_bzip2_decompress,
3396 option_fasta_width,
3397 option_fastaout,
3398 option_fastq_ascii,
3399 option_fastq_qmax,
3400 option_fastq_qmin,
3401 option_fastqout,
3402 option_gzip_decompress,
3403 option_label_suffix,
3404 option_log,
3405 option_no_progress,
3406 option_notrunclabels,
3407 option_quiet,
3408 option_relabel,
3409 option_relabel_keep,
3410 option_relabel_md5,
3411 option_relabel_self,
3412 option_relabel_sha1,
3413 option_sizein,
3414 option_sizeout,
3415 option_threads,
3416 option_xee,
3417 option_xsize,
3418 -1 },
3419
3420 { option_fastx_subsample,
3421 option_bzip2_decompress,
3422 option_fasta_width,
3423 option_fastaout,
3424 option_fastaout_discarded,
3425 option_fastq_ascii,
3426 option_fastq_qmax,
3427 option_fastq_qmin,
3428 option_fastqout,
3429 option_fastqout_discarded,
3430 option_gzip_decompress,
3431 option_log,
3432 option_no_progress,
3433 option_notrunclabels,
3434 option_quiet,
3435 option_randseed,
3436 option_relabel,
3437 option_relabel_keep,
3438 option_relabel_md5,
3439 option_relabel_self,
3440 option_relabel_sha1,
3441 option_sample_pct,
3442 option_sample_size,
3443 option_sizein,
3444 option_sizeout,
3445 option_threads,
3446 option_xee,
3447 option_xsize,
3448 -1 },
3449
3450 { option_h,
3451 option_log,
3452 option_quiet,
3453 option_threads,
3454 -1 },
3455
3456 { option_help,
3457 option_log,
3458 option_quiet,
3459 option_threads,
3460 -1 },
3461
3462 { option_makeudb_usearch,
3463 option_bzip2_decompress,
3464 option_dbmask,
3465 option_gzip_decompress,
3466 option_hardmask,
3467 option_log,
3468 option_minseqlength,
3469 option_no_progress,
3470 option_notrunclabels,
3471 option_output,
3472 option_quiet,
3473 option_threads,
3474 option_wordlength,
3475 -1 },
3476
3477 { option_maskfasta,
3478 option_bzip2_decompress,
3479 option_fasta_width,
3480 option_gzip_decompress,
3481 option_hardmask,
3482 option_log,
3483 option_max_unmasked_pct,
3484 option_maxseqlength,
3485 option_min_unmasked_pct,
3486 option_minseqlength,
3487 option_no_progress,
3488 option_notrunclabels,
3489 option_output,
3490 option_qmask,
3491 option_quiet,
3492 option_relabel,
3493 option_relabel_keep,
3494 option_relabel_md5,
3495 option_relabel_self,
3496 option_relabel_sha1,
3497 option_sizein,
3498 option_sizeout,
3499 option_threads,
3500 option_xee,
3501 option_xsize,
3502 -1 },
3503
3504 { option_orient,
3505 option_bzip2_decompress,
3506 option_db,
3507 option_dbmask,
3508 option_fasta_width,
3509 option_fastaout,
3510 option_fastqout,
3511 option_gzip_decompress,
3512 option_log,
3513 option_no_progress,
3514 option_notmatched,
3515 option_notrunclabels,
3516 option_qmask,
3517 option_quiet,
3518 option_relabel,
3519 option_relabel_keep,
3520 option_relabel_md5,
3521 option_relabel_self,
3522 option_relabel_sha1,
3523 option_sizein,
3524 option_sizeout,
3525 option_tabbedout,
3526 option_threads,
3527 option_wordlength,
3528 option_xee,
3529 option_xsize,
3530 -1 },
3531
3532 { option_rereplicate,
3533 option_bzip2_decompress,
3534 option_fasta_width,
3535 option_gzip_decompress,
3536 option_log,
3537 option_no_progress,
3538 option_notrunclabels,
3539 option_output,
3540 option_quiet,
3541 option_relabel,
3542 option_relabel_keep,
3543 option_relabel_md5,
3544 option_relabel_self,
3545 option_relabel_sha1,
3546 option_sizein,
3547 option_sizeout,
3548 option_threads,
3549 option_xee,
3550 option_xsize,
3551 -1 },
3552
3553 { option_search_exact,
3554 option_alnout,
3555 option_biomout,
3556 option_blast6out,
3557 option_bzip2_decompress,
3558 option_db,
3559 option_dbmask,
3560 option_dbmatched,
3561 option_dbnotmatched,
3562 option_fasta_width,
3563 option_fastapairs,
3564 option_gzip_decompress,
3565 option_hardmask,
3566 option_log,
3567 option_match,
3568 option_matched,
3569 option_maxhits,
3570 option_maxqsize,
3571 option_maxqt,
3572 option_maxseqlength,
3573 option_maxsizeratio,
3574 option_maxsl,
3575 option_mincols,
3576 option_minqt,
3577 option_minseqlength,
3578 option_minsizeratio,
3579 option_minsl,
3580 option_mintsize,
3581 option_mismatch,
3582 option_mothur_shared_out,
3583 option_no_progress,
3584 option_notmatched,
3585 option_notrunclabels,
3586 option_otutabout,
3587 option_output_no_hits,
3588 option_qmask,
3589 option_quiet,
3590 option_relabel,
3591 option_relabel_keep,
3592 option_relabel_md5,
3593 option_relabel_self,
3594 option_relabel_sha1,
3595 option_rowlen,
3596 option_samheader,
3597 option_samout,
3598 option_self,
3599 option_sizein,
3600 option_sizeout,
3601 option_strand,
3602 option_threads,
3603 option_top_hits_only,
3604 option_uc,
3605 option_uc_allhits,
3606 option_userfields,
3607 option_userout,
3608 option_xee,
3609 option_xsize,
3610 -1 },
3611
3612 { option_sff_convert,
3613 option_fastq_asciiout,
3614 option_fastq_qmaxout,
3615 option_fastq_qminout,
3616 option_fastqout,
3617 option_log,
3618 option_no_progress,
3619 option_quiet,
3620 option_relabel,
3621 option_relabel_keep,
3622 option_relabel_md5,
3623 option_relabel_self,
3624 option_relabel_sha1,
3625 option_sff_clip,
3626 option_sizeout,
3627 option_threads,
3628 -1 },
3629
3630 { option_shuffle,
3631 option_bzip2_decompress,
3632 option_fasta_width,
3633 option_fastq_ascii,
3634 option_fastq_qmax,
3635 option_fastq_qmin,
3636 option_gzip_decompress,
3637 option_log,
3638 option_maxseqlength,
3639 option_minseqlength,
3640 option_no_progress,
3641 option_notrunclabels,
3642 option_output,
3643 option_quiet,
3644 option_randseed,
3645 option_relabel,
3646 option_relabel_keep,
3647 option_relabel_md5,
3648 option_relabel_self,
3649 option_relabel_sha1,
3650 option_sizein,
3651 option_sizeout,
3652 option_threads,
3653 option_topn,
3654 option_xee,
3655 option_xsize,
3656 -1 },
3657
3658 { option_sintax,
3659 option_bzip2_decompress,
3660 option_db,
3661 option_dbmask,
3662 option_fastq_ascii,
3663 option_fastq_qmax,
3664 option_fastq_qmin,
3665 option_gzip_decompress,
3666 option_log,
3667 option_no_progress,
3668 option_notrunclabels,
3669 option_quiet,
3670 option_sintax_cutoff,
3671 option_strand,
3672 option_tabbedout,
3673 option_threads,
3674 option_wordlength,
3675 -1 },
3676
3677 { option_sortbylength,
3678 option_bzip2_decompress,
3679 option_fasta_width,
3680 option_fastq_ascii,
3681 option_fastq_qmax,
3682 option_fastq_qmin,
3683 option_gzip_decompress,
3684 option_log,
3685 option_maxseqlength,
3686 option_minseqlength,
3687 option_no_progress,
3688 option_notrunclabels,
3689 option_output,
3690 option_quiet,
3691 option_relabel,
3692 option_relabel_keep,
3693 option_relabel_md5,
3694 option_relabel_self,
3695 option_relabel_sha1,
3696 option_sizein,
3697 option_sizeout,
3698 option_threads,
3699 option_topn,
3700 option_xee,
3701 option_xsize,
3702 -1 },
3703
3704 { option_sortbysize,
3705 option_bzip2_decompress,
3706 option_fasta_width,
3707 option_fastq_ascii,
3708 option_fastq_qmax,
3709 option_fastq_qmin,
3710 option_gzip_decompress,
3711 option_log,
3712 option_maxseqlength,
3713 option_maxsize,
3714 option_minseqlength,
3715 option_minsize,
3716 option_no_progress,
3717 option_notrunclabels,
3718 option_output,
3719 option_quiet,
3720 option_relabel,
3721 option_relabel_keep,
3722 option_relabel_md5,
3723 option_relabel_self,
3724 option_relabel_sha1,
3725 option_sizein,
3726 option_sizeout,
3727 option_threads,
3728 option_topn,
3729 option_xee,
3730 option_xsize,
3731 -1 },
3732
3733 { option_uchime2_denovo,
3734 option_abskew,
3735 option_alignwidth,
3736 option_borderline,
3737 option_chimeras,
3738 option_dn,
3739 option_fasta_score,
3740 option_fasta_width,
3741 option_gapext,
3742 option_gapopen,
3743 option_hardmask,
3744 option_log,
3745 option_match,
3746 option_mindiffs,
3747 option_mindiv,
3748 option_minh,
3749 option_mismatch,
3750 option_no_progress,
3751 option_nonchimeras,
3752 option_notrunclabels,
3753 option_qmask,
3754 option_quiet,
3755 option_relabel,
3756 option_relabel_keep,
3757 option_relabel_md5,
3758 option_relabel_self,
3759 option_relabel_sha1,
3760 option_sizein,
3761 option_sizeout,
3762 option_threads,
3763 option_uchimealns,
3764 option_uchimeout,
3765 option_uchimeout5,
3766 option_xee,
3767 option_xn,
3768 option_xsize,
3769 -1 },
3770
3771 { option_uchime3_denovo,
3772 option_abskew,
3773 option_alignwidth,
3774 option_borderline,
3775 option_chimeras,
3776 option_dn,
3777 option_fasta_score,
3778 option_fasta_width,
3779 option_gapext,
3780 option_gapopen,
3781 option_hardmask,
3782 option_log,
3783 option_match,
3784 option_mindiffs,
3785 option_mindiv,
3786 option_minh,
3787 option_mismatch,
3788 option_no_progress,
3789 option_nonchimeras,
3790 option_notrunclabels,
3791 option_qmask,
3792 option_quiet,
3793 option_relabel,
3794 option_relabel_keep,
3795 option_relabel_md5,
3796 option_relabel_self,
3797 option_relabel_sha1,
3798 option_sizein,
3799 option_sizeout,
3800 option_threads,
3801 option_uchimealns,
3802 option_uchimeout,
3803 option_uchimeout5,
3804 option_xee,
3805 option_xn,
3806 option_xsize,
3807 -1 },
3808
3809 { option_uchime_denovo,
3810 option_abskew,
3811 option_alignwidth,
3812 option_borderline,
3813 option_chimeras,
3814 option_dn,
3815 option_fasta_score,
3816 option_fasta_width,
3817 option_gapext,
3818 option_gapopen,
3819 option_hardmask,
3820 option_log,
3821 option_match,
3822 option_mindiffs,
3823 option_mindiv,
3824 option_minh,
3825 option_mismatch,
3826 option_no_progress,
3827 option_nonchimeras,
3828 option_notrunclabels,
3829 option_qmask,
3830 option_quiet,
3831 option_relabel,
3832 option_relabel_keep,
3833 option_relabel_md5,
3834 option_relabel_self,
3835 option_relabel_sha1,
3836 option_sizein,
3837 option_sizeout,
3838 option_threads,
3839 option_uchimealns,
3840 option_uchimeout,
3841 option_uchimeout5,
3842 option_xee,
3843 option_xn,
3844 option_xsize,
3845 -1 },
3846
3847 { option_uchime_ref,
3848 option_abskew,
3849 option_alignwidth,
3850 option_borderline,
3851 option_chimeras,
3852 option_db,
3853 option_dbmask,
3854 option_dn,
3855 option_fasta_score,
3856 option_fasta_width,
3857 option_gapext,
3858 option_gapopen,
3859 option_hardmask,
3860 option_log,
3861 option_match,
3862 option_mindiffs,
3863 option_mindiv,
3864 option_minh,
3865 option_mismatch,
3866 option_no_progress,
3867 option_nonchimeras,
3868 option_notrunclabels,
3869 option_qmask,
3870 option_quiet,
3871 option_relabel,
3872 option_relabel_keep,
3873 option_relabel_md5,
3874 option_relabel_self,
3875 option_relabel_sha1,
3876 option_self,
3877 option_selfid,
3878 option_sizein,
3879 option_sizeout,
3880 option_strand,
3881 option_threads,
3882 option_uchimealns,
3883 option_uchimeout,
3884 option_uchimeout5,
3885 option_xee,
3886 option_xn,
3887 option_xsize,
3888 -1 },
3889
3890 { option_udb2fasta,
3891 option_fasta_width,
3892 option_log,
3893 option_no_progress,
3894 option_output,
3895 option_quiet,
3896 option_relabel,
3897 option_relabel_keep,
3898 option_relabel_md5,
3899 option_relabel_self,
3900 option_relabel_sha1,
3901 option_sizein,
3902 option_sizeout,
3903 option_threads,
3904 option_xee,
3905 option_xsize,
3906 -1 },
3907
3908 { option_udbinfo,
3909 option_log,
3910 option_quiet,
3911 option_threads,
3912 -1 },
3913
3914 { option_udbstats,
3915 option_log,
3916 option_no_progress,
3917 option_quiet,
3918 option_threads,
3919 -1 },
3920
3921 { option_usearch_global,
3922 option_alnout,
3923 option_band,
3924 option_biomout,
3925 option_blast6out,
3926 option_bzip2_decompress,
3927 option_db,
3928 option_dbmask,
3929 option_dbmatched,
3930 option_dbnotmatched,
3931 option_fasta_width,
3932 option_fastapairs,
3933 option_fulldp,
3934 option_gapext,
3935 option_gapopen,
3936 option_gzip_decompress,
3937 option_hardmask,
3938 option_hspw,
3939 option_id,
3940 option_iddef,
3941 option_idprefix,
3942 option_idsuffix,
3943 option_leftjust,
3944 option_log,
3945 option_match,
3946 option_matched,
3947 option_maxaccepts,
3948 option_maxdiffs,
3949 option_maxgaps,
3950 option_maxhits,
3951 option_maxid,
3952 option_maxqsize,
3953 option_maxqt,
3954 option_maxrejects,
3955 option_maxseqlength,
3956 option_maxsizeratio,
3957 option_maxsl,
3958 option_maxsubs,
3959 option_mid,
3960 option_mincols,
3961 option_minhsp,
3962 option_minqt,
3963 option_minseqlength,
3964 option_minsizeratio,
3965 option_minsl,
3966 option_mintsize,
3967 option_minwordmatches,
3968 option_mismatch,
3969 option_mothur_shared_out,
3970 option_no_progress,
3971 option_notmatched,
3972 option_notrunclabels,
3973 option_otutabout,
3974 option_output_no_hits,
3975 option_pattern,
3976 option_qmask,
3977 option_query_cov,
3978 option_quiet,
3979 option_relabel,
3980 option_relabel_keep,
3981 option_relabel_md5,
3982 option_relabel_self,
3983 option_relabel_sha1,
3984 option_rightjust,
3985 option_rowlen,
3986 option_samheader,
3987 option_samout,
3988 option_self,
3989 option_selfid,
3990 option_sizein,
3991 option_sizeout,
3992 option_slots,
3993 option_strand,
3994 option_target_cov,
3995 option_threads,
3996 option_top_hits_only,
3997 option_uc,
3998 option_uc_allhits,
3999 option_userfields,
4000 option_userout,
4001 option_weak_id,
4002 option_wordlength,
4003 option_xdrop_nw,
4004 option_xee,
4005 option_xsize,
4006 -1 },
4007
4008 { option_v,
4009 option_log,
4010 option_quiet,
4011 option_threads,
4012 -1 },
4013
4014 { option_version,
4015 option_log,
4016 option_quiet,
4017 option_threads,
4018 -1 }
4019 };
4020
4021 /* check that only one commmand is specified */
4022 int commands = 0;
4023 int k = -1;
4024 for (int i = 0; i < commands_count; i++) {
4025 if (options_selected[command_options[i]])
4026 {
4027 commands++;
4028 k = i;
4029 }
4030
4031 }
4032 if (commands > 1) {
4033 fatal("More than one command specified");
4034
4035 }
4036
4037 /* check that only valid options are specified */
4038 int invalid_options = 0;
4039
4040 if (commands == 0)
4041 {
4042 /* check if any options are specified */
4043 bool any_options = false;
4044 for (bool i
4045 : options_selected) {
4046 if (i) {
4047 any_options = true;
4048
4049 }
4050
4051 }
4052 if (any_options) {
4053 fprintf(stderr, "WARNING: Options given, but no valid command specified.\n");
4054
4055 }
4056 }
4057 else
4058 {
4059 for (int i = 0; i < options_count; i++)
4060 {
4061 if (options_selected[i])
4062 {
4063 int j = 0;
4064 bool ok = false;
4065 while (valid_options[k][j] >= 0)
4066 {
4067 if (valid_options[k][j] == i)
4068 {
4069 ok = true;
4070 break;
4071 }
4072 j++;
4073 }
4074 if (! ok)
4075 {
4076 invalid_options++;
4077
4078 if (invalid_options == 1)
4079 {
4080 fprintf(stderr,
4081 "Fatal error: Invalid options to command %s\n",
4082 long_options[command_options[k]].name);
4083 fprintf(stderr,
4084 "Invalid option(s):");
4085 }
4086 fprintf(stderr, " --%s",
4087 long_options[i].name);
4088 }
4089 }
4090 }
4091
4092 if (invalid_options > 0)
4093 {
4094 fprintf(stderr, "\nThe valid options for the %s command are:",
4095 long_options[command_options[k]].name);
4096 int count = 0;
4097 for(int j = 1; valid_options[k][j] >= 0; j++)
4098 {
4099 fprintf(stderr, " --%s", long_options[valid_options[k][j]].name);
4100 count++;
4101 }
4102 if (! count) {
4103 fprintf(stderr, " (none)");
4104
4105 }
4106 fprintf(stderr, "\n");
4107 exit(EXIT_FAILURE);
4108 }
4109 }
4110
4111 /* multi-threaded commands */
4112
4113 if ((opt_threads < 0) || (opt_threads > 1024)) {
4114 fatal("The argument to --threads must be in the range 0 (default) to 1024");
4115
4116 }
4117
4118 if (opt_allpairs_global || opt_cluster_fast || opt_cluster_size ||
4119 opt_cluster_smallmem || opt_cluster_unoise || opt_fastq_mergepairs ||
4120 opt_fastx_mask || opt_maskfasta || opt_search_exact || opt_sintax ||
4121 opt_uchime_ref || opt_usearch_global)
4122 {
4123 if (opt_threads == 0) {
4124 opt_threads = arch_get_cores();
4125
4126 }
4127 }
4128 else
4129 {
4130 if (opt_threads > 1) {
4131 fprintf(stderr, "WARNING: The %s command does not support multithreading.\nOnly 1 thread used.\n", long_options[command_options[k]].name);
4132
4133 }
4134 opt_threads = 1;
4135 }
4136
4137 if (opt_cluster_unoise) {
4138 opt_weak_id = 0.90;
4139 } else
4140 if (opt_weak_id > opt_id) {
4141 opt_weak_id = opt_id;
4142
4143 }
4144
4145 if (opt_maxrejects == -1)
4146 {
4147 if (opt_cluster_fast) {
4148 opt_maxrejects = 8;
4149 } else {
4150 opt_maxrejects = 32;
4151
4152 }
4153 }
4154
4155 if (opt_maxaccepts < 0) {
4156 fatal("The argument to --maxaccepts must not be negative");
4157
4158 }
4159
4160 if (opt_maxrejects < 0) {
4161 fatal("The argument to --maxrejects must not be negative");
4162
4163 }
4164
4165 if (opt_wordlength == 0)
4166 {
4167 /* set default word length */
4168 if (opt_orient) {
4169 opt_wordlength = 12;
4170 } else {
4171 opt_wordlength = 8;
4172
4173 }
4174 }
4175
4176 if ((opt_wordlength < 3) || (opt_wordlength > 15)) {
4177 fatal("The argument to --wordlength must be in the range 3 to 15");
4178
4179 }
4180
4181 if ((opt_iddef < 0) || (opt_iddef > 4)) {
4182 fatal("The argument to --iddef must in the range 0 to 4");
4183
4184 }
4185
4186 #if 0
4187
4188 if (opt_match <= 0)
4189 fatal("The argument to --match must be positive");
4190
4191 if (opt_mismatch >= 0)
4192 fatal("The argument to --mismatch must be negative");
4193
4194 #endif
4195
4196
4197 if (opt_alignwidth < 0) {
4198 fatal("The argument to --alignwidth must not be negative");
4199
4200 }
4201
4202 if (opt_rowlen < 0) {
4203 fatal("The argument to --rowlen must not be negative");
4204
4205 }
4206
4207 if (opt_qmask == MASK_ERROR) {
4208 fatal("The argument to --qmask must be none, dust or soft");
4209
4210 }
4211
4212 if (opt_dbmask == MASK_ERROR) {
4213 fatal("The argument to --dbmask must be none, dust or soft");
4214
4215 }
4216
4217 if ((opt_sample_pct < 0.0) || (opt_sample_pct > 100.0)) {
4218 fatal("The argument to --sample_pct must be in the range 0.0 to 100.0");
4219
4220 }
4221
4222 if (opt_sample_size < 0) {
4223 fatal("The argument to --sample_size must not be negative");
4224
4225 }
4226
4227 if (((opt_relabel ? 1 : 0) +
4228 opt_relabel_md5 + opt_relabel_self + opt_relabel_sha1) > 1) {
4229 fatal("Specify only one of --relabel, --relabel_self, --relabel_sha1, or --relabel_md5");
4230
4231 }
4232
4233 if (opt_fastq_tail < 1) {
4234 fatal("The argument to --fastq_tail must be positive");
4235
4236 }
4237
4238 if ((opt_min_unmasked_pct < 0.0) && (opt_min_unmasked_pct > 100.0)) {
4239 fatal("The argument to --min_unmasked_pct must be between 0.0 and 100.0");
4240
4241 }
4242
4243 if ((opt_max_unmasked_pct < 0.0) && (opt_max_unmasked_pct > 100.0)) {
4244 fatal("The argument to --max_unmasked_pct must be between 0.0 and 100.0");
4245
4246 }
4247
4248 if (opt_min_unmasked_pct > opt_max_unmasked_pct) {
4249 fatal("The argument to --min_unmasked_pct cannot be larger than to --max_unmasked_pct");
4250
4251 }
4252
4253 if ((opt_fastq_ascii != 33) && (opt_fastq_ascii != 64)) {
4254 fatal("The argument to --fastq_ascii must be 33 or 64");
4255
4256 }
4257
4258 if (opt_fastq_qmin > opt_fastq_qmax) {
4259 fatal("The argument to --fastq_qmin cannot be larger than to --fastq_qmax");
4260
4261 }
4262
4263 if (opt_fastq_ascii + opt_fastq_qmin < 33) {
4264 fatal("Sum of arguments to --fastq_ascii and --fastq_qmin must be no less than 33");
4265
4266 }
4267
4268 if (opt_fastq_ascii + opt_fastq_qmax > 126) {
4269 fatal("Sum of arguments to --fastq_ascii and --fastq_qmax must be no more than 126");
4270
4271 }
4272
4273 if (opt_fastq_qminout > opt_fastq_qmaxout) {
4274 fatal("The argument to --fastq_qminout cannot be larger than to --fastq_qmaxout");
4275
4276 }
4277
4278 if ((opt_fastq_asciiout != 33) && (opt_fastq_asciiout != 64)) {
4279 fatal("The argument to --fastq_asciiout must be 33 or 64");
4280
4281 }
4282
4283 if (opt_fastq_asciiout + opt_fastq_qminout < 33) {
4284 fatal("Sum of arguments to --fastq_asciiout and --fastq_qminout must be no less than 33");
4285
4286 }
4287
4288 if (opt_fastq_asciiout + opt_fastq_qmaxout > 126) {
4289 fatal("Sum of arguments to --fastq_asciiout and --fastq_qmaxout must be no more than 126");
4290
4291 }
4292
4293 if (opt_gzip_decompress && opt_bzip2_decompress) {
4294 fatal("Specify either --gzip_decompress or --bzip2_decompress, not both");
4295
4296 }
4297
4298 if ((opt_sintax_cutoff < 0.0) || (opt_sintax_cutoff > 1.0)) {
4299 fatal("The argument to sintax_cutoff must be in the range 0.0 to 1.0");
4300
4301 }
4302
4303 if (opt_minuniquesize < 1) {
4304 fatal("The argument to minuniquesize must be at least 1");
4305
4306 }
4307
4308 if (opt_maxuniquesize < 1) {
4309 fatal("The argument to maxuniquesize must be at least 1");
4310
4311 }
4312
4313 if (opt_maxsize < 1) {
4314 fatal("The argument to maxsize must be at least 1");
4315
4316 }
4317
4318 if (opt_maxhits < 0) {
4319 fatal("The argument to maxhits cannot be negative");
4320
4321 }
4322
4323
4324 /* TODO: check valid range of gap penalties */
4325
4326 /* adapt/adjust parameters */
4327
4328 #if 1
4329
4330 /*
4331 Adjust gap open penalty according to convention.
4332
4333 The specified gap open penalties include the penalty for
4334 a single nucleotide gap:
4335
4336 gap penalty = gap open penalty + (gap length - 1) * gap extension penalty
4337
4338 The rest of the code assumes the first nucleotide gap penalty is not
4339 included in the gap opening penalty.
4340 */
4341
4342 opt_gap_open_query_left -= opt_gap_extension_query_left;
4343 opt_gap_open_target_left -= opt_gap_extension_target_left;
4344 opt_gap_open_query_interior -= opt_gap_extension_query_interior;
4345 opt_gap_open_target_interior -= opt_gap_extension_target_interior;
4346 opt_gap_open_query_right -= opt_gap_extension_query_right;
4347 opt_gap_open_target_right -= opt_gap_extension_target_right;
4348
4349 #endif
4350
4351 /* set defaults parameters, if not specified */
4352
4353 if (opt_maxhits == 0) {
4354 opt_maxhits = LONG_MAX;
4355
4356 }
4357
4358 if (opt_minwordmatches < 0) {
4359 opt_minwordmatches = minwordmatches_defaults[opt_wordlength];
4360
4361 }
4362
4363 /* set default opt_minsize depending on command */
4364 if (opt_minsize == 0)
4365 {
4366 if (opt_cluster_unoise) {
4367 opt_minsize = 8;
4368 } else {
4369 opt_minsize = 1;
4370
4371 }
4372 }
4373
4374 /* set default opt_abskew depending on command */
4375 if (opt_abskew < 0.0)
4376 {
4377 if (opt_uchime3_denovo) {
4378 opt_abskew = 16.0;
4379 } else {
4380 opt_abskew = 2.0;
4381
4382 }
4383 }
4384
4385 /* set default opt_minseqlength depending on command */
4386
4387 if (opt_minseqlength < 0)
4388 {
4389 if (opt_cluster_fast ||
4390 opt_cluster_size ||
4391 opt_cluster_smallmem ||
4392 opt_cluster_unoise ||
4393 opt_derep_fulllength ||
4394 opt_derep_id ||
4395 opt_derep_prefix ||
4396 opt_makeudb_usearch ||
4397 opt_sintax ||
4398 opt_usearch_global) {
4399 opt_minseqlength = 32;
4400 } else {
4401 opt_minseqlength = 1;
4402
4403 }
4404 }
4405
4406 if (opt_sintax) {
4407 opt_notrunclabels = 1;
4408
4409 }
4410 }
4411
show_publication()4412 void show_publication()
4413 {
4414 fprintf(stdout,
4415 "Rognes T, Flouri T, Nichols B, Quince C, Mahe F (2016)\n"
4416 "VSEARCH: a versatile open source tool for metagenomics\n"
4417 "PeerJ 4:e2584 doi: 10.7717/peerj.2584 https://doi.org/10.7717/peerj.2584\n"
4418 "\n");
4419 }
4420
cmd_version()4421 void cmd_version()
4422 {
4423 if (! opt_quiet)
4424 {
4425 show_publication();
4426
4427 #ifdef HAVE_ZLIB_H
4428 printf("Compiled with support for gzip-compressed files,");
4429 if (gz_lib)
4430 {
4431 printf(" and the library is loaded.\n");
4432
4433 char * (*zlibVersion_p)();
4434 zlibVersion_p = (char * (*)()) arch_dlsym(gz_lib,
4435 "zlibVersion");
4436 char * gz_version = (*zlibVersion_p)();
4437 uLong (*zlibCompileFlags_p)();
4438 zlibCompileFlags_p = (uLong (*)()) arch_dlsym(gz_lib,
4439 "zlibCompileFlags");
4440 uLong flags = (*zlibCompileFlags_p)();
4441
4442 printf("zlib version %s, compile flags %lx", gz_version, flags);
4443 if (flags & 0x0400) {
4444 printf(" (ZLIB_WINAPI)");
4445
4446 }
4447 printf("\n");
4448 }
4449 else {
4450 printf(" but the library was not found.\n");
4451
4452 }
4453 #else
4454 printf("Compiled without support for gzip-compressed files.\n");
4455 #endif
4456
4457 #ifdef HAVE_BZLIB_H
4458 printf("Compiled with support for bzip2-compressed files,");
4459 if (bz2_lib) {
4460 printf(" and the library is loaded.\n");
4461 } else {
4462 printf(" but the library was not found.\n");
4463
4464 }
4465 #else
4466 printf("Compiled without support for bzip2-compressed files.\n");
4467 #endif
4468 }
4469 }
4470
cmd_help()4471 void cmd_help()
4472 {
4473 /* 0 1 2 3 4 5 6 7 */
4474 /* 01234567890123456789012345678901234567890123456789012345678901234567890123456789 */
4475
4476 if (! opt_quiet)
4477 {
4478 show_publication();
4479
4480 fprintf(stdout,
4481 "Usage: %s [OPTIONS]\n", progname);
4482
4483 fprintf(stdout,
4484 "\n"
4485 "General options\n"
4486 " --bzip2_decompress decompress input with bzip2 (required if pipe)\n"
4487 " --fasta_width INT width of FASTA seq lines, 0 for no wrap (80)\n"
4488 " --gzip_decompress decompress input with gzip (required if pipe)\n"
4489 " --help | -h display help information\n"
4490 " --log FILENAME write messages, timing and memory info to file\n"
4491 " --maxseqlength INT maximum sequence length (50000)\n"
4492 " --minseqlength INT min seq length (clust/derep/search: 32, other:1)\n"
4493 " --no_progress do not show progress indicator\n"
4494 " --notrunclabels do not truncate labels at first space\n"
4495 " --quiet output just warnings and fatal errors to stderr\n"
4496 " --threads INT number of threads to use, zero for all cores (0)\n"
4497 " --version | -v display version information\n"
4498 "\n"
4499 "Chimera detection\n"
4500 " --uchime_denovo FILENAME detect chimeras de novo\n"
4501 " --uchime2_denovo FILENAME detect chimeras de novo in denoised amplicons\n"
4502 " --uchime3_denovo FILENAME detect chimeras de novo in denoised amplicons\n"
4503 " --uchime_ref FILENAME detect chimeras using a reference database\n"
4504 " Data\n"
4505 " --db FILENAME reference database for --uchime_ref\n"
4506 " Parameters\n"
4507 " --abskew REAL minimum abundance ratio (2.0, 16.0 for uchime3)\n"
4508 " --dn REAL 'no' vote pseudo-count (1.4)\n"
4509 " --mindiffs INT minimum number of differences in segment (3) *\n"
4510 " --mindiv REAL minimum divergence from closest parent (0.8) *\n"
4511 " --minh REAL minimum score (0.28) * ignored in uchime2/3\n"
4512 " --sizein propagate abundance annotation from input\n"
4513 " --self exclude identical labels for --uchime_ref\n"
4514 " --selfid exclude identical sequences for --uchime_ref\n"
4515 " --xn REAL 'no' vote weight (8.0)\n"
4516 " Output\n"
4517 " --alignwidth INT width of alignment in uchimealn output (80)\n"
4518 " --borderline FILENAME output borderline chimeric sequences to file\n"
4519 " --chimeras FILENAME output chimeric sequences to file\n"
4520 " --fasta_score include chimera score in fasta output\n"
4521 " --nonchimeras FILENAME output non-chimeric sequences to file\n"
4522 " --relabel STRING relabel nonchimeras with this prefix string\n"
4523 " --relabel_keep keep the old label after the new when relabelling\n"
4524 " --relabel_md5 relabel with md5 digest of normalized sequence\n"
4525 " --relabel_self relabel with the sequence itself as label\n"
4526 " --relabel_sha1 relabel with sha1 digest of normalized sequence\n"
4527 " --sizeout include abundance information when relabelling\n"
4528 " --uchimealns FILENAME output chimera alignments to file\n"
4529 " --uchimeout FILENAME output to chimera info to tab-separated file\n"
4530 " --uchimeout5 make output compatible with uchime version 5\n"
4531 " --xsize strip abundance information in output\n"
4532 "\n"
4533 "Clustering\n"
4534 " --cluster_fast FILENAME cluster sequences after sorting by length\n"
4535 " --cluster_size FILENAME cluster sequences after sorting by abundance\n"
4536 " --cluster_smallmem FILENAME cluster already sorted sequences (see -usersort)\n"
4537 " --cluster_unoise FILENAME denoise Illumina amplicon reads\n"
4538 " Parameters (most searching options also apply)\n"
4539 " --cons_truncate do not ignore terminal gaps in MSA for consensus\n"
4540 " --id REAL reject if identity lower, accepted values: 0-1.0\n"
4541 " --iddef INT id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2)\n"
4542 " --qmask none|dust|soft mask seqs with dust, soft or no method (dust)\n"
4543 " --sizein propagate abundance annotation from input\n"
4544 " --strand plus|both cluster using plus or both strands (plus)\n"
4545 " --usersort indicate sequences not pre-sorted by length\n"
4546 " --minsize INT minimum abundance (unoise only) (8)\n"
4547 " --unoise_alpha REAL alpha parameter (unoise only) (2.0)\n"
4548 " Output\n"
4549 " --biomout FILENAME filename for OTU table output in biom 1.0 format\n"
4550 " --centroids FILENAME output centroid sequences to FASTA file\n"
4551 " --clusterout_id add cluster id info to consout and profile files\n"
4552 " --clusterout_sort order msaout, consout, profile by decr abundance\n"
4553 " --clusters STRING output each cluster to a separate FASTA file\n"
4554 " --consout FILENAME output cluster consensus sequences to FASTA file\n"
4555 " --mothur_shared_out FN filename for OTU table output in mothur format\n"
4556 " --msaout FILENAME output multiple seq. alignments to FASTA file\n"
4557 " --otutabout FILENAME filename for OTU table output in classic format\n"
4558 " --profile FILENAME output sequence profile of each cluster to file\n"
4559 " --relabel STRING relabel centroids with this prefix string\n"
4560 " --relabel_keep keep the old label after the new when relabelling\n"
4561 " --relabel_md5 relabel with md5 digest of normalized sequence\n"
4562 " --relabel_self relabel with the sequence itself as label\n"
4563 " --relabel_sha1 relabel with sha1 digest of normalized sequence\n"
4564 " --sizeorder sort accepted centroids by abundance, AGC\n"
4565 " --sizeout write cluster abundances to centroid file\n"
4566 " --uc FILENAME specify filename for UCLUST-like output\n"
4567 " --xsize strip abundance information in output\n"
4568 "\n"
4569 "Convert SFF to FASTQ\n"
4570 " --sff_convert FILENAME convert given SFF file to FASTQ format\n"
4571 " Parameters\n"
4572 " --sff_clip clip ends of sequences as indicated in file (no)\n"
4573 " --fastq_asciiout INT FASTQ output quality score ASCII base char (33)\n"
4574 " --fastq_qmaxout INT maximum base quality value for FASTQ output (41)\n"
4575 " --fastq_qminout INT minimum base quality value for FASTQ output (0)\n"
4576 " Output\n"
4577 " --fastqout FILENAME output converted sequences to given FASTQ file\n"
4578 "\n"
4579 "Dereplication and rereplication\n"
4580 " --derep_fulllength FILENAME dereplicate sequences in the given FASTA file\n"
4581 " --derep_id FILENAME dereplicate using both identifiers and sequences\n"
4582 " --derep_prefix FILENAME dereplicate sequences in file based on prefixes\n"
4583 " --rereplicate FILENAME rereplicate sequences in the given FASTA file\n"
4584 " Parameters\n"
4585 " --maxuniquesize INT maximum abundance for output from dereplication\n"
4586 " --minuniquesize INT minimum abundance for output from dereplication\n"
4587 " --sizein propagate abundance annotation from input\n"
4588 " --strand plus|both dereplicate plus or both strands (plus)\n"
4589 " Output\n"
4590 " --output FILENAME output FASTA file\n"
4591 " --relabel STRING relabel with this prefix string\n"
4592 " --relabel_keep keep the old label after the new when relabelling\n"
4593 " --relabel_md5 relabel with md5 digest of normalized sequence\n"
4594 " --relabel_self relabel with the sequence itself as label\n"
4595 " --relabel_sha1 relabel with sha1 digest of normalized sequence\n"
4596 " --sizeout write abundance annotation to output\n"
4597 " --topn INT output only n most abundant sequences after derep\n"
4598 " --uc FILENAME filename for UCLUST-like dereplication output\n"
4599 " --xsize strip abundance information in derep output\n"
4600 "\n"
4601 "FASTQ format conversion\n"
4602 " --fastq_convert FILENAME convert between FASTQ file formats\n"
4603 " Parameters\n"
4604 " --fastq_ascii INT FASTQ input quality score ASCII base char (33)\n"
4605 " --fastq_asciiout INT FASTQ output quality score ASCII base char (33)\n"
4606 " --fastq_qmax INT maximum base quality value for FASTQ input (41)\n"
4607 " --fastq_qmaxout INT maximum base quality value for FASTQ output (41)\n"
4608 " --fastq_qmin INT minimum base quality value for FASTQ input (0)\n"
4609 " --fastq_qminout INT minimum base quality value for FASTQ output (0)\n"
4610 " Output\n"
4611 " --fastqout FILENAME FASTQ output filename for converted sequences\n"
4612 "\n"
4613 "FASTQ format detection and quality analysis\n"
4614 " --fastq_chars FILENAME analyse FASTQ file for version and quality range\n"
4615 " Parameters\n"
4616 " --fastq_tail INT min length of tails to count for fastq_chars (4)\n"
4617 "\n"
4618 "FASTQ quality statistics\n"
4619 " --fastq_stats FILENAME report statistics on FASTQ file\n"
4620 " --fastq_eestats FILENAME quality score and expected error statistics\n"
4621 " --fastq_eestats2 FILENAME expected error and length cutoff statistics\n"
4622 " Parameters\n"
4623 " --ee_cutoffs REAL,... fastq_eestats2 expected error cutoffs (0.5,1,2)\n"
4624 " --fastq_ascii INT FASTQ input quality score ASCII base char (33)\n"
4625 " --fastq_qmax INT maximum base quality value for FASTQ input (41)\n"
4626 " --fastq_qmin INT minimum base quality value for FASTQ input (0)\n"
4627 " --length_cutoffs INT,INT,INT fastq_eestats2 length (min,max,incr) (50,*,50)\n"
4628 " Output\n"
4629 " --log FILENAME output file for fastq_stats statistics\n"
4630 " --output FILENAME output file for fastq_eestats(2) statistics\n"
4631 "\n"
4632 "Masking (new)\n"
4633 " --fastx_mask FILENAME mask sequences in the given FASTA or FASTQ file\n"
4634 " Parameters\n"
4635 " --fastq_ascii INT FASTQ input quality score ASCII base char (33)\n"
4636 " --fastq_qmax INT maximum base quality value for FASTQ input (41)\n"
4637 " --fastq_qmin INT minimum base quality value for FASTQ input (0)\n"
4638 " --hardmask mask by replacing with N instead of lower case\n"
4639 " --max_unmasked_pct max unmasked %% of sequences to keep (100.0)\n"
4640 " --min_unmasked_pct min unmasked %% of sequences to keep (0.0)\n"
4641 " --qmask none|dust|soft mask seqs with dust, soft or no method (dust)\n"
4642 " Output\n"
4643 " --fastaout FILENAME output to specified FASTA file\n"
4644 " --fastqout FILENAME output to specified FASTQ file\n"
4645 "\n"
4646 "Masking (old)\n"
4647 " --maskfasta FILENAME mask sequences in the given FASTA file\n"
4648 " Parameters\n"
4649 " --hardmask mask by replacing with N instead of lower case\n"
4650 " --qmask none|dust|soft mask seqs with dust, soft or no method (dust)\n"
4651 " Output\n"
4652 " --output FILENAME output to specified FASTA file\n"
4653 "\n"
4654 "Orient sequences in forward or reverse direction\n"
4655 " --orient FILENAME orient sequences in given FASTA/FASTQ file\n"
4656 " Data\n"
4657 " --db FILENAME database of sequences in correct orientation\n"
4658 " --dbmask none|dust|soft mask db seqs with dust, soft or no method (dust)\n"
4659 " --qmask none|dust|soft mask query with dust, soft or no method (dust)\n"
4660 " --wordlength INT length of words used for matching 3-15 (12)\n"
4661 " Output\n"
4662 " --fastaout FILENAME FASTA output filename for oriented sequences\n"
4663 " --fastqout FILENAME FASTQ output filenamr for oriented sequences\n"
4664 " --notmatched FILENAME output filename for undetermined sequences\n"
4665 " --tabbedout FILENAME output filename for result information\n"
4666 "\n"
4667 "Paired-end reads joining\n"
4668 " --fastq_join FILENAME join paired-end reads into one sequence with gap\n"
4669 " Data\n"
4670 " --reverse FILENAME specify FASTQ file with reverse reads\n"
4671 " --join_padgap STRING sequence string used for padding (NNNNNNNN)\n"
4672 " --join_padgapq STRING quality string used for padding (IIIIIIII)\n"
4673 " Output\n"
4674 " --fastaout FILENAME FASTA output filename for joined sequences\n"
4675 " --fastqout FILENAME FASTQ output filename for joined sequences\n"
4676 "\n"
4677 "Paired-end reads merging\n"
4678 " --fastq_mergepairs FILENAME merge paired-end reads into one sequence\n"
4679 " Data\n"
4680 " --reverse FILENAME specify FASTQ file with reverse reads\n"
4681 " Parameters\n"
4682 " --fastq_allowmergestagger allow merging of staggered reads\n"
4683 " --fastq_ascii INT FASTQ input quality score ASCII base char (33)\n"
4684 " --fastq_maxdiffpct REAL maximum percentage diff. bases in overlap (100.0)\n"
4685 " --fastq_maxdiffs INT maximum number of different bases in overlap (10)\n"
4686 " --fastq_maxee REAL maximum expected error value for merged sequence\n"
4687 " --fastq_maxmergelen maximum length of entire merged sequence\n"
4688 " --fastq_maxns INT maximum number of N's\n"
4689 " --fastq_minlen INT minimum input read length after truncation (1)\n"
4690 " --fastq_minmergelen minimum length of entire merged sequence\n"
4691 " --fastq_minovlen minimum length of overlap between reads (10)\n"
4692 " --fastq_nostagger disallow merging of staggered reads (default)\n"
4693 " --fastq_qmax INT maximum base quality value for FASTQ input (41)\n"
4694 " --fastq_qmaxout INT maximum base quality value for FASTQ output (41)\n"
4695 " --fastq_qmin INT minimum base quality value for FASTQ input (0)\n"
4696 " --fastq_qminout INT minimum base quality value for FASTQ output (0)\n"
4697 " --fastq_truncqual INT base quality value for truncation\n"
4698 " Output\n"
4699 " --eetabbedout FILENAME output error statistics to specified file\n"
4700 " --fastaout FILENAME FASTA output filename for merged sequences\n"
4701 " --fastaout_notmerged_fwd FN FASTA filename for non-merged forward sequences\n"
4702 " --fastaout_notmerged_rev FN FASTA filename for non-merged reverse sequences\n"
4703 " --fastq_eeout include expected errors (ee) in FASTQ output\n"
4704 " --fastqout FILENAME FASTQ output filename for merged sequences\n"
4705 " --fastqout_notmerged_fwd FN FASTQ filename for non-merged forward sequences\n"
4706 " --fastqout_notmerged_rev FN FASTQ filename for non-merged reverse sequences\n"
4707 " --label_suffix STRING suffix to append to label of merged sequences\n"
4708 " --xee remove expected errors (ee) info from output\n"
4709 "\n"
4710 "Pairwise alignment\n"
4711 " --allpairs_global FILENAME perform global alignment of all sequence pairs\n"
4712 " Output (most searching options also apply)\n"
4713 " --alnout FILENAME filename for human-readable alignment output\n"
4714 " --acceptall output all pairwise alignments\n"
4715 "\n"
4716 "Restriction site cutting\n"
4717 " --cut FILENAME filename of FASTA formatted input sequences\n"
4718 " Parameters\n"
4719 " --cut_pattern STRING pattern to match with ^ and _ at cut sites\n"
4720 " Output\n"
4721 " --fastaout FILENAME FASTA filename for fragments on forward strand\n"
4722 " --fastaout_rev FILENAME FASTA filename for fragments on reverse strand\n"
4723 " --fastaout_discarded FN FASTA filename for non-matching sequences\n"
4724 " --fastaout_discarded_rev FN FASTA filename for non-matching, reverse compl.\n"
4725 "\n"
4726 "Reverse complementation\n"
4727 " --fastx_revcomp FILENAME reverse-complement seqs in FASTA or FASTQ file\n"
4728 " Parameters\n"
4729 " --fastq_ascii INT FASTQ input quality score ASCII base char (33)\n"
4730 " --fastq_qmax INT maximum base quality value for FASTQ input (41)\n"
4731 " --fastq_qmin INT minimum base quality value for FASTQ input (0)\n"
4732 " Output\n"
4733 " --fastaout FILENAME FASTA output filename\n"
4734 " --fastqout FILENAME FASTQ output filename\n"
4735 " --label_suffix STRING label to append to identifier in the output\n"
4736 "\n"
4737 "Searching\n"
4738 " --search_exact FILENAME filename of queries for exact match search\n"
4739 " --usearch_global FILENAME filename of queries for global alignment search\n"
4740 " Data\n"
4741 " --db FILENAME name of UDB or FASTA database for search\n"
4742 " Parameters\n"
4743 " --dbmask none|dust|soft mask db with dust, soft or no method (dust)\n"
4744 " --fulldp full dynamic programming alignment (always on)\n"
4745 " --gapext STRING penalties for gap extension (2I/1E)\n"
4746 " --gapopen STRING penalties for gap opening (20I/2E)\n"
4747 " --hardmask mask by replacing with N instead of lower case\n"
4748 " --id REAL reject if identity lower\n"
4749 " --iddef INT id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2)\n"
4750 " --idprefix INT reject if first n nucleotides do not match\n"
4751 " --idsuffix INT reject if last n nucleotides do not match\n"
4752 " --leftjust reject if terminal gaps at alignment left end\n"
4753 " --match INT score for match (2)\n"
4754 " --maxaccepts INT number of hits to accept and show per strand (1)\n"
4755 " --maxdiffs INT reject if more substitutions or indels\n"
4756 " --maxgaps INT reject if more indels\n"
4757 " --maxhits INT maximum number of hits to show (unlimited)\n"
4758 " --maxid REAL reject if identity higher\n"
4759 " --maxqsize INT reject if query abundance larger\n"
4760 " --maxqt REAL reject if query/target length ratio higher\n"
4761 " --maxrejects INT number of non-matching hits to consider (32)\n"
4762 " --maxsizeratio REAL reject if query/target abundance ratio higher\n"
4763 " --maxsl REAL reject if shorter/longer length ratio higher\n"
4764 " --maxsubs INT reject if more substitutions\n"
4765 " --mid REAL reject if percent identity lower, ignoring gaps\n"
4766 " --mincols INT reject if alignment length shorter\n"
4767 " --minqt REAL reject if query/target length ratio lower\n"
4768 " --minsizeratio REAL reject if query/target abundance ratio lower\n"
4769 " --minsl REAL reject if shorter/longer length ratio lower\n"
4770 " --mintsize INT reject if target abundance lower\n"
4771 " --minwordmatches INT minimum number of word matches required (12)\n"
4772 " --mismatch INT score for mismatch (-4)\n"
4773 " --pattern STRING option is ignored\n"
4774 " --qmask none|dust|soft mask query with dust, soft or no method (dust)\n"
4775 " --query_cov REAL reject if fraction of query seq. aligned lower\n"
4776 " --rightjust reject if terminal gaps at alignment right end\n"
4777 " --sizein propagate abundance annotation from input\n"
4778 " --self reject if labels identical\n"
4779 " --selfid reject if sequences identical\n"
4780 " --slots INT option is ignored\n"
4781 " --strand plus|both search plus or both strands (plus)\n"
4782 " --target_cov REAL reject if fraction of target seq. aligned lower\n"
4783 " --weak_id REAL include aligned hits with >= id; continue search\n"
4784 " --wordlength INT length of words for database index 3-15 (8)\n"
4785 " Output\n"
4786 " --alnout FILENAME filename for human-readable alignment output\n"
4787 " --biomout FILENAME filename for OTU table output in biom 1.0 format\n"
4788 " --blast6out FILENAME filename for blast-like tab-separated output\n"
4789 " --dbmatched FILENAME FASTA file for matching database sequences\n"
4790 " --dbnotmatched FILENAME FASTA file for non-matching database sequences\n"
4791 " --fastapairs FILENAME FASTA file with pairs of query and target\n"
4792 " --matched FILENAME FASTA file for matching query sequences\n"
4793 " --mothur_shared_out FN filename for OTU table output in mothur format\n"
4794 " --notmatched FILENAME FASTA file for non-matching query sequences\n"
4795 " --otutabout FILENAME filename for OTU table output in classic format\n"
4796 " --output_no_hits output non-matching queries to output files\n"
4797 " --rowlen INT width of alignment lines in alnout output (64)\n"
4798 " --samheader include a header in the SAM output file\n"
4799 " --samout FILENAME filename for SAM format output\n"
4800 " --sizeout write abundance annotation to dbmatched file\n"
4801 " --top_hits_only output only hits with identity equal to the best\n"
4802 " --uc FILENAME filename for UCLUST-like output\n"
4803 " --uc_allhits show all, not just top hit with uc output\n"
4804 " --userfields STRING fields to output in userout file\n"
4805 " --userout FILENAME filename for user-defined tab-separated output\n"
4806 "\n"
4807 "Shuffling and sorting\n"
4808 " --shuffle FILENAME shuffle order of sequences in FASTA file randomly\n"
4809 " --sortbylength FILENAME sort sequences by length in given FASTA file\n"
4810 " --sortbysize FILENAME abundance sort sequences in given FASTA file\n"
4811 " Parameters\n"
4812 " --maxsize INT maximum abundance for sortbysize\n"
4813 " --minsize INT minimum abundance for sortbysize\n"
4814 " --randseed INT seed for PRNG, zero to use random data source (0)\n"
4815 " --sizein propagate abundance annotation from input\n"
4816 " Output\n"
4817 " --output FILENAME output to specified FASTA file\n"
4818 " --relabel STRING relabel sequences with this prefix string\n"
4819 " --relabel_keep keep the old label after the new when relabelling\n"
4820 " --relabel_md5 relabel with md5 digest of normalized sequence\n"
4821 " --relabel_self relabel with the sequence itself as label\n"
4822 " --relabel_sha1 relabel with sha1 digest of normalized sequence\n"
4823 " --sizeout include abundance information when relabelling\n"
4824 " --topn INT output just first n sequences\n"
4825 " --xsize strip abundance information in output\n"
4826 "\n"
4827 "Subsampling\n"
4828 " --fastx_subsample FILENAME subsample sequences from given FASTA/FASTQ file\n"
4829 " Parameters\n"
4830 " --fastq_ascii INT FASTQ input quality score ASCII base char (33)\n"
4831 " --fastq_qmax INT maximum base quality value for FASTQ input (41)\n"
4832 " --fastq_qmin INT minimum base quality value for FASTQ input (0)\n"
4833 " --randseed INT seed for PRNG, zero to use random data source (0)\n"
4834 " --sample_pct REAL sampling percentage between 0.0 and 100.0\n"
4835 " --sample_size INT sampling size\n"
4836 " --sizein consider abundance info from input, do not ignore\n"
4837 " Output\n"
4838 " --fastaout FILENAME output subsampled sequences to FASTA file\n"
4839 " --fastaout_discarded FILE output non-subsampled sequences to FASTA file\n"
4840 " --fastqout FILENAME output subsampled sequences to FASTQ file\n"
4841 " --fastqout_discarded output non-subsampled sequences to FASTQ file\n"
4842 " --relabel STRING relabel sequences with this prefix string\n"
4843 " --relabel_keep keep the old label after the new when relabelling\n"
4844 " --relabel_md5 relabel with md5 digest of normalized sequence\n"
4845 " --relabel_self relabel with the sequence itself as label\n"
4846 " --relabel_sha1 relabel with sha1 digest of normalized sequence\n"
4847 " --sizeout update abundance information in output\n"
4848 " --xsize strip abundance information in output\n"
4849 "\n"
4850 "Taxonomic classification\n"
4851 " --sintax FILENAME classify sequences in given FASTA/FASTQ file\n"
4852 " Parameters\n"
4853 " --db FILENAME taxonomic reference db in given FASTA or UDB file\n"
4854 " --sintax_cutoff REAL confidence value cutoff level (0.0)\n"
4855 " Output\n"
4856 " --tabbedout FILENAME write results to given tab-delimited file\n"
4857 "\n"
4858 "Trimming and filtering\n"
4859 " --fastx_filter FILENAME trim and filter sequences in FASTA/FASTQ file\n"
4860 " --fastq_filter FILENAME trim and filter sequences in FASTQ file\n"
4861 " --reverse FILENAME FASTQ file with other end of paired-end reads\n"
4862 " Parameters\n"
4863 " --fastq_ascii INT FASTQ input quality score ASCII base char (33)\n"
4864 " --fastq_maxee REAL discard if expected error value is higher\n"
4865 " --fastq_maxee_rate REAL discard if expected error rate is higher\n"
4866 " --fastq_maxlen INT discard if length of sequence is longer\n"
4867 " --fastq_maxns INT discard if number of N's is higher\n"
4868 " --fastq_minlen INT discard if length of sequence is shorter\n"
4869 " --fastq_qmax INT maximum base quality value for FASTQ input (41)\n"
4870 " --fastq_qmin INT minimum base quality value for FASTQ input (0)\n"
4871 " --fastq_stripleft INT delete given number of bases from the 5' end\n"
4872 " --fastq_stripright INT delete given number of bases from the 3' end\n"
4873 " --fastq_truncee REAL truncate to given maximum expected error\n"
4874 " --fastq_trunclen INT truncate to given length (discard if shorter)\n"
4875 " --fastq_trunclen_keep INT truncate to given length (keep if shorter)\n"
4876 " --fastq_truncqual INT truncate to given minimum base quality\n"
4877 " --maxsize INT discard if abundance of sequence is above\n"
4878 " --minsize INT discard if abundance of sequence is below\n"
4879 " Output\n"
4880 " --eeout include expected errors in output\n"
4881 " --fastaout FN FASTA filename for passed sequences\n"
4882 " --fastaout_discarded FN FASTA filename for discarded sequences\n"
4883 " --fastaout_discarded_rev FN FASTA filename for discarded reverse sequences\n"
4884 " --fastaout_rev FN FASTA filename for passed reverse sequences\n"
4885 " --fastqout FN FASTQ filename for passed sequences\n"
4886 " --fastqout_discarded FN FASTQ filename for discarded sequences\n"
4887 " --fastqout_discarded_rev FN FASTQ filename for discarded reverse sequences\n"
4888 " --fastqout_rev FN FASTQ filename for passed reverse sequences\n"
4889 " --relabel STRING relabel filtered sequences with given prefix\n"
4890 " --relabel_keep keep the old label after the new when relabelling\n"
4891 " --relabel_md5 relabel filtered sequences with md5 digest\n"
4892 " --relabel_self relabel with the sequence itself as label\n"
4893 " --relabel_sha1 relabel filtered sequences with sha1 digest\n"
4894 " --sizeout include abundance information when relabelling\n"
4895 " --xee remove expected errors (ee) info from output\n"
4896 " --xsize strip abundance information in output\n"
4897 "\n"
4898 "UDB files\n"
4899 " --makeudb_usearch FILENAME make UDB file from given FASTA file\n"
4900 " --udb2fasta FILENAME output FASTA file from given UDB file\n"
4901 " --udbinfo FILENAME show information about UDB file\n"
4902 " --udbstats FILENAME report statistics about indexed words in UDB file\n"
4903 " Parameters\n"
4904 " --dbmask none|dust|soft mask db with dust, soft or no method (dust)\n"
4905 " --hardmask mask by replacing with N instead of lower case\n"
4906 " --wordlength INT length of words for database index 3-15 (8)\n"
4907 " Output\n"
4908 " --output FILENAME UDB or FASTA output file\n"
4909 );
4910 }
4911 }
4912
cmd_allpairs_global()4913 void cmd_allpairs_global()
4914 {
4915 /* check options */
4916
4917 if ((!opt_alnout) && (!opt_userout) &&
4918 (!opt_uc) && (!opt_blast6out) &&
4919 (!opt_matched) && (!opt_notmatched) &&
4920 (!opt_samout) && (!opt_fastapairs)) {
4921 fatal("No output files specified");
4922
4923 }
4924
4925 if (! (opt_acceptall || ((opt_id >= 0.0) && (opt_id <= 1.0)))) {
4926 fatal("Specify either --acceptall or --id with an identity from 0.0 to 1.0");
4927
4928 }
4929
4930 allpairs_global(cmdline, progheader);
4931 }
4932
cmd_usearch_global()4933 void cmd_usearch_global()
4934 {
4935 /* check options */
4936
4937 if ((!opt_alnout) && (!opt_userout) &&
4938 (!opt_uc) && (!opt_blast6out) &&
4939 (!opt_matched) && (!opt_notmatched) &&
4940 (!opt_dbmatched) && (!opt_dbnotmatched) &&
4941 (!opt_samout) && (!opt_otutabout) &&
4942 (!opt_biomout) && (!opt_mothur_shared_out) &&
4943 (!opt_fastapairs)) {
4944 fatal("No output files specified");
4945
4946 }
4947
4948 if (!opt_db) {
4949 fatal("Database filename not specified with --db");
4950
4951 }
4952
4953 if ((opt_id < 0.0) || (opt_id > 1.0)) {
4954 fatal("Identity between 0.0 and 1.0 must be specified with --id");
4955
4956 }
4957
4958 usearch_global(cmdline, progheader);
4959 }
4960
cmd_search_exact()4961 void cmd_search_exact()
4962 {
4963 /* check options */
4964
4965 if ((!opt_alnout) && (!opt_userout) &&
4966 (!opt_uc) && (!opt_blast6out) &&
4967 (!opt_matched) && (!opt_notmatched) &&
4968 (!opt_dbmatched) && (!opt_dbnotmatched) &&
4969 (!opt_samout) && (!opt_otutabout) &&
4970 (!opt_biomout) && (!opt_mothur_shared_out) &&
4971 (!opt_fastapairs)) {
4972 fatal("No output files specified");
4973
4974 }
4975
4976 if (!opt_db) {
4977 fatal("Database filename not specified with --db");
4978
4979 }
4980
4981 search_exact(cmdline, progheader);
4982 }
4983
cmd_sortbysize()4984 void cmd_sortbysize()
4985 {
4986 if (!opt_output) {
4987 fatal("FASTA output file for sortbysize must be specified with --output");
4988
4989 }
4990
4991 sortbysize();
4992 }
4993
cmd_sortbylength()4994 void cmd_sortbylength()
4995 {
4996 if (!opt_output) {
4997 fatal("FASTA output file for sortbylength must be specified with --output");
4998
4999 }
5000
5001 sortbylength();
5002 }
5003
cmd_rereplicate()5004 void cmd_rereplicate()
5005 {
5006 if (!opt_output) {
5007 fatal("FASTA output file for rereplicate must be specified with --output");
5008
5009 }
5010
5011 rereplicate();
5012 }
5013
cmd_derep()5014 void cmd_derep()
5015 {
5016 if ((!opt_output) && (!opt_uc)) {
5017 fatal("Output file for dereplication must be specified with --output or --uc");
5018
5019 }
5020
5021 if (opt_derep_fulllength) {
5022 derep_fulllength();
5023 } else if (opt_derep_id) {
5024 derep_id();
5025 } else
5026 {
5027 if (opt_strand > 1) {
5028 fatal("Option '--strand both' not supported with --derep_prefix");
5029 } else {
5030 derep_prefix();
5031
5032 }
5033 }
5034 }
5035
cmd_shuffle()5036 void cmd_shuffle()
5037 {
5038 if (!opt_output) {
5039 fatal("Output file for shuffling must be specified with --output");
5040
5041 }
5042
5043 shuffle();
5044 }
5045
cmd_fastq_eestats()5046 void cmd_fastq_eestats()
5047 {
5048 if (!opt_output) {
5049 fatal("Output file for fastq_eestats must be specified with --output");
5050
5051 }
5052
5053 fastq_eestats();
5054 }
5055
cmd_fastq_eestats2()5056 void cmd_fastq_eestats2()
5057 {
5058 if (!opt_output) {
5059 fatal("Output file for fastq_eestats2 must be specified with --output");
5060
5061 }
5062
5063 fastq_eestats2();
5064 }
5065
cmd_subsample()5066 void cmd_subsample()
5067 {
5068 if ((!opt_fastaout) && (!opt_fastqout)) {
5069 fatal("Specify output files for subsampling with --fastaout and/or --fastqout");
5070
5071 }
5072
5073 if ((opt_sample_pct > 0) == (opt_sample_size > 0)) {
5074 fatal("Specify either --sample_pct or --sample_size");
5075
5076 }
5077
5078 subsample();
5079 }
5080
cmd_maskfasta()5081 void cmd_maskfasta()
5082 {
5083 if (!opt_output) {
5084 fatal("Output file for masking must be specified with --output");
5085
5086 }
5087
5088 maskfasta();
5089 }
5090
cmd_makeudb_usearch()5091 void cmd_makeudb_usearch()
5092 {
5093 if (!opt_output) {
5094 fatal("UDB output file must be specified with --output");
5095
5096 }
5097 udb_make();
5098 }
5099
cmd_udb2fasta()5100 void cmd_udb2fasta()
5101 {
5102 if (!opt_output) {
5103 fatal("FASTA output file must be specified with --output");
5104
5105 }
5106 udb_fasta();
5107 }
5108
cmd_fastx_mask()5109 void cmd_fastx_mask()
5110 {
5111 if ((!opt_fastaout) && (!opt_fastqout)) {
5112 fatal("Specify output files for masking with --fastaout and/or --fastqout");
5113
5114 }
5115
5116 fastx_mask();
5117 }
5118
cmd_none()5119 void cmd_none()
5120 {
5121 if (! opt_quiet) {
5122 fprintf(stderr,
5123 "For help, please enter: %s --help | less\n"
5124 "For further details, please consult the manual by entering: man vsearch\n"
5125 "\n"
5126 "Selected commands:\n"
5127 "\n"
5128 "vsearch --allpairs_global FILENAME --id 0.5 --alnout FILENAME\n"
5129 "vsearch --cluster_size FILENAME --id 0.97 --centroids FILENAME\n"
5130 "vsearch --cut FILENAME --cut_pattern G^AATT_C --fastaout FILENAME\n"
5131 "vsearch --derep_fulllength FILENAME --output FILENAME\n"
5132 "vsearch --fastq_chars FILENAME\n"
5133 "vsearch --fastq_convert FILENAME --fastqout FILENAME --fastq_ascii 64\n"
5134 "vsearch --fastq_eestats FILENAME --output FILENAME\n"
5135 "vsearch --fastq_eestats2 FILENAME --output FILENAME\n"
5136 "vsearch --fastq_mergepairs FILENAME --reverse FILENAME --fastqout FILENAME\n"
5137 "vsearch --fastq_stats FILENAME --log FILENAME\n"
5138 "vsearch --fastx_filter FILENAME --fastaout FILENAME --fastq_trunclen 100\n"
5139 "vsearch --fastx_getseq FILENAME --label LABEL --fastaout FILENAME\n"
5140 "vsearch --fastx_mask FILENAME --fastaout FILENAME\n"
5141 "vsearch --fastx_revcomp FILENAME --fastqout FILENAME\n"
5142 "vsearch --fastx_subsample FILENAME --fastaout FILENAME --sample_pct 1\n"
5143 "vsearch --makeudb_usearch FILENAME --output FILENAME\n"
5144 "vsearch --search_exact FILENAME --db FILENAME --alnout FILENAME\n"
5145 "vsearch --sff_convert FILENAME --output FILENAME --sff_clip\n"
5146 "vsearch --shuffle FILENAME --output FILENAME\n"
5147 "vsearch --sintax FILENAME --db FILENAME --tabbedout FILENAME\n"
5148 "vsearch --sortbylength FILENAME --output FILENAME\n"
5149 "vsearch --sortbysize FILENAME --output FILENAME\n"
5150 "vsearch --uchime_denovo FILENAME --nonchimeras FILENAME\n"
5151 "vsearch --uchime_ref FILENAME --db FILENAME --nonchimeras FILENAME\n"
5152 "vsearch --usearch_global FILENAME --db FILENAME --id 0.97 --alnout FILENAME\n"
5153 "\n"
5154 "Other commands: cluster_fast, cluster_smallmem, cluster_unoise, cut, derep_id,\n"
5155 " derep_prefix, fastq_filter, fastq_join, fastx_getseqs,\n"
5156 " fastx_getsubseqs, maskfasta, orient, rereplicate,\n"
5157 " uchime2_denovo, uchime3_denovo, udb2fasta, udbinfo, udbstats,\n"
5158 " version\n"
5159 "\n",
5160 progname);
5161
5162 }
5163 }
5164
cmd_fastx_revcomp()5165 void cmd_fastx_revcomp()
5166 {
5167 if ((!opt_fastaout) && (!opt_fastqout)) {
5168 fatal("No output files specified");
5169
5170 }
5171
5172 fastx_revcomp();
5173 }
5174
cmd_fastq_convert()5175 void cmd_fastq_convert()
5176 {
5177 if (! opt_fastqout) {
5178 fatal("No output file specified with --fastqout");
5179
5180 }
5181
5182 fastq_convert();
5183 }
5184
cmd_cluster()5185 void cmd_cluster()
5186 {
5187 if ((!opt_alnout) && (!opt_userout) &&
5188 (!opt_uc) && (!opt_blast6out) &&
5189 (!opt_matched) && (!opt_notmatched) &&
5190 (!opt_centroids) && (!opt_clusters) &&
5191 (!opt_consout) && (!opt_msaout) &&
5192 (!opt_samout) && (!opt_profile) &&
5193 (!opt_otutabout) && (!opt_biomout) &&
5194 (!opt_mothur_shared_out)) {
5195 fatal("No output files specified");
5196
5197 }
5198
5199 if (!opt_cluster_unoise) {
5200 if ((opt_id < 0.0) || (opt_id > 1.0)) {
5201 fatal("Identity between 0.0 and 1.0 must be specified with --id");
5202
5203 }
5204
5205 }
5206
5207 if (opt_cluster_fast) {
5208 cluster_fast(cmdline, progheader);
5209 } else if (opt_cluster_smallmem) {
5210 cluster_smallmem(cmdline, progheader);
5211 } else if (opt_cluster_size) {
5212 cluster_size(cmdline, progheader);
5213 } else if (opt_cluster_unoise) {
5214 cluster_unoise(cmdline, progheader);
5215
5216 }
5217 }
5218
cmd_uchime()5219 void cmd_uchime()
5220 {
5221 if ((!opt_chimeras) && (!opt_nonchimeras) &&
5222 (!opt_uchimeout) && (!opt_uchimealns)) {
5223 fatal("No output files specified");
5224
5225 }
5226
5227 if (opt_uchime_ref && ! opt_db) {
5228 fatal("Database filename not specified with --db");
5229
5230 }
5231
5232 if (opt_xn <= 1.0) {
5233 fatal("Argument to --xn must be > 1");
5234
5235 }
5236
5237 if (opt_dn <= 0.0) {
5238 fatal("Argument to --dn must be > 0");
5239
5240 }
5241
5242 if ((!opt_uchime2_denovo) && (!opt_uchime3_denovo))
5243 {
5244 if (opt_mindiffs <= 0) {
5245 fatal("Argument to --mindiffs must be > 0");
5246
5247 }
5248
5249 if (opt_mindiv <= 0.0) {
5250 fatal("Argument to --mindiv must be > 0");
5251
5252 }
5253
5254 if (opt_minh <= 0.0) {
5255 fatal("Argument to --minh must be > 0");
5256
5257 }
5258 }
5259
5260 #if 0
5261 if (opt_abskew <= 1.0)
5262 fatal("Argument to --abskew must be > 1");
5263 #endif
5264
5265 chimera();
5266 }
5267
cmd_fastq_mergepairs()5268 void cmd_fastq_mergepairs()
5269 {
5270 if (!opt_reverse) {
5271 fatal("No reverse reads file specified with --reverse");
5272
5273 }
5274 if ((!opt_fastqout) &&
5275 (!opt_fastaout) &&
5276 (!opt_fastqout_notmerged_fwd) &&
5277 (!opt_fastqout_notmerged_rev) &&
5278 (!opt_fastaout_notmerged_fwd) &&
5279 (!opt_fastaout_notmerged_rev) &&
5280 (!opt_eetabbedout)) {
5281 fatal("No output files specified");
5282
5283 }
5284 fastq_mergepairs();
5285 }
5286
5287
fillheader()5288 void fillheader() {
5289 constexpr double one_gigabyte {1024.0 * 1024.0 * 1024.0};
5290 snprintf(progheader, 80,
5291 "%s v%s_%s, %.1fGB RAM, %ld cores",
5292 PROG_NAME, PROG_VERSION, PROG_ARCH,
5293 arch_get_memtotal() / one_gigabyte,
5294 arch_get_cores());
5295 }
5296
5297
getentirecommandline(int argc,char ** argv)5298 void getentirecommandline(int argc, char** argv)
5299 {
5300 int len = 0;
5301 for (int i=0; i<argc; i++) {
5302 len += strlen(argv[i]);
5303
5304 }
5305
5306 cmdline = (char*) xmalloc(len+argc);
5307 cmdline[0] = 0;
5308
5309 for (int i=0; i<argc; i++)
5310 {
5311 if (i>0) {
5312 strcat(cmdline, " ");
5313
5314 }
5315 strcat(cmdline, argv[i]);
5316 }
5317 }
5318
show_header()5319 void show_header()
5320 {
5321 if (! opt_quiet)
5322 {
5323 fprintf(stderr, "%s\n", progheader);
5324 fprintf(stderr, "https://github.com/torognes/vsearch\n");
5325 fprintf(stderr, "\n");
5326 }
5327 }
5328
main(int argc,char ** argv)5329 int main(int argc, char** argv)
5330 {
5331 fillheader();
5332
5333 getentirecommandline(argc, argv);
5334
5335 cpu_features_detect();
5336
5337 args_init(argc, argv);
5338
5339 if (opt_log)
5340 {
5341 fp_log = fopen_output(opt_log);
5342 if (!fp_log) {
5343 fatal("Unable to open log file for writing");
5344
5345 }
5346 fprintf(fp_log, "%s\n", progheader);
5347 fprintf(fp_log, "%s\n", cmdline);
5348
5349 char time_string[26];
5350 time_start = time(nullptr);
5351 struct tm * tm_start = localtime(& time_start);
5352 strftime(time_string, 26, "%c", tm_start);
5353 fprintf(fp_log, "Started %s\n", time_string);
5354 }
5355
5356 random_init();
5357
5358 show_header();
5359
5360 dynlibs_open();
5361
5362 #ifdef __x86_64__
5363 if (!sse2_present) {
5364 fatal("Sorry, this program requires a cpu with SSE2.");
5365
5366 }
5367 #endif
5368
5369 if (opt_help) {
5370 cmd_help();
5371 } else if (opt_allpairs_global) {
5372 cmd_allpairs_global();
5373 } else if (opt_usearch_global) {
5374 cmd_usearch_global();
5375 } else if (opt_sortbysize) {
5376 cmd_sortbysize();
5377 } else if (opt_sortbylength) {
5378 cmd_sortbylength();
5379 } else if (opt_derep_fulllength || opt_derep_id || opt_derep_prefix) {
5380 cmd_derep();
5381 } else if (opt_shuffle) {
5382 cmd_shuffle();
5383 } else if (opt_fastx_subsample) {
5384 cmd_subsample();
5385 } else if (opt_maskfasta) {
5386 cmd_maskfasta();
5387 } else if (opt_cluster_smallmem || opt_cluster_fast || opt_cluster_size || opt_cluster_unoise) {
5388 cmd_cluster();
5389 } else if (opt_uchime_denovo || opt_uchime_ref || opt_uchime2_denovo || opt_uchime3_denovo) {
5390 cmd_uchime();
5391 } else if (opt_fastq_chars) {
5392 fastq_chars();
5393 } else if (opt_fastq_stats) {
5394 fastq_stats();
5395 } else if (opt_fastq_filter) {
5396 fastq_filter();
5397 } else if (opt_fastx_filter) {
5398 fastx_filter();
5399 } else if (opt_fastx_revcomp) {
5400 cmd_fastx_revcomp();
5401 } else if (opt_search_exact) {
5402 cmd_search_exact();
5403 } else if (opt_fastx_mask) {
5404 cmd_fastx_mask();
5405 } else if (opt_fastq_convert) {
5406 cmd_fastq_convert();
5407 } else if (opt_fastq_mergepairs) {
5408 cmd_fastq_mergepairs();
5409 } else if (opt_fastq_eestats) {
5410 cmd_fastq_eestats();
5411 } else if (opt_fastq_eestats2) {
5412 cmd_fastq_eestats2();
5413 } else if (opt_fastq_join) {
5414 fastq_join();
5415 } else if (opt_rereplicate) {
5416 cmd_rereplicate();
5417 } else if (opt_version) {
5418 cmd_version();
5419 } else if (opt_makeudb_usearch) {
5420 cmd_makeudb_usearch();
5421 } else if (opt_udb2fasta) {
5422 cmd_udb2fasta();
5423 } else if (opt_udbinfo) {
5424 udb_info();
5425 } else if (opt_udbstats) {
5426 udb_stats();
5427 } else if (opt_sintax) {
5428 sintax();
5429 } else if (opt_sff_convert) {
5430 sff_convert();
5431 } else if (opt_fastx_getseq) {
5432 fastx_getseq();
5433 } else if (opt_fastx_getseqs) {
5434 fastx_getseqs();
5435 } else if (opt_fastx_getsubseq) {
5436 fastx_getsubseq();
5437 } else if (opt_cut) {
5438 cut();
5439 } else if (opt_orient) {
5440 orient();
5441 } else {
5442 cmd_none();
5443
5444 }
5445
5446 if (opt_log)
5447 {
5448 time_finish = time(nullptr);
5449 struct tm * tm_finish = localtime(& time_finish);
5450 char time_string[26];
5451 strftime(time_string, 26, "%c", tm_finish);
5452 fprintf(fp_log, "\n");
5453 fprintf(fp_log, "Finished %s", time_string);
5454
5455 double time_diff = difftime(time_finish, time_start);
5456 fprintf(fp_log, "\n");
5457 fprintf(fp_log, "Elapsed time %02.0lf:%02.0lf\n",
5458 floor(time_diff / 60.0),
5459 floor(time_diff - 60.0 * floor(time_diff / 60.0)));
5460 double maxmem = arch_get_memused() / 1048576.0;
5461 if (maxmem < 1024.0) {
5462 fprintf(fp_log, "Max memory %.1lfMB\n", maxmem);
5463 } else {
5464 fprintf(fp_log, "Max memory %.1lfGB\n", maxmem/1024.0);
5465
5466 }
5467 fclose(fp_log);
5468 }
5469
5470 if (opt_ee_cutoffs_values) {
5471 xfree(opt_ee_cutoffs_values);
5472
5473 }
5474 opt_ee_cutoffs_values = nullptr;
5475
5476 xfree(cmdline);
5477 dynlibs_close();
5478 }
5479