1#!/usr/local/bin/perl
2# $Id: legacy_blast.pl 608983 2020-05-26 15:41:33Z camacho $
3# ===========================================================================
4#
5#                            PUBLIC DOMAIN NOTICE
6#               National Center for Biotechnology Information
7#
8#  This software/database is a "United States Government Work" under the
9#  terms of the United States Copyright Act.  It was written as part of
10#  the author's official duties as a United States Government employee and
11#  thus cannot be copyrighted.  This software/database is freely available
12#  to the public for use. The National Library of Medicine and the U.S.
13#  Government have not placed any restriction on its use or reproduction.
14#
15#  Although all reasonable efforts have been taken to ensure the accuracy
16#  and reliability of the software and data, the NLM and the U.S.
17#  Government do not and cannot warrant the performance or results that
18#  may be obtained by using this software or data. The NLM and the U.S.
19#  Government disclaim all warranties, express or implied, including
20#  warranties of performance, merchantability or fitness for any particular
21#  purpose.
22#
23#  Please cite the author in any work or product based on this material.
24#
25# ===========================================================================
26#
27# Author:  Christiam Camacho
28#
29# File Description:
30#   Script to convert NCBI C toolkit command line program and arguments into
31#   NCBI C++ toolkit command line program and arguments for the BLAST suite of
32#   programs
33#
34# ===========================================================================
35
36use strict;
37use warnings;
38use Getopt::Long qw(:config no_ignore_case bundling no_auto_abbrev);
39use Pod::Usage;
40
41use constant DEBUG => 0;
42# Default PATH where binaries will be found
43use constant DEFAULT_PATH => "/usr/bin";
44
45pod2usage({-exitval => 1, -verbose => 2}) if (@ARGV == 0);
46
47my $application = shift;
48my $print_only = "0"; # Determines whether script prints or runs the command
49# This array will contain file names to delete that are created with bl2seq's
50# -A option
51my @files2delete;
52
53my $cmd;
54if ($application eq "blastall") {
55    $cmd = &handle_blastall(\$print_only);
56} elsif ($application eq "megablast") {
57    $cmd = &handle_megablast(\$print_only);
58} elsif ($application eq "blastpgp") {
59    $cmd = &handle_blastpgp(\$print_only);
60} elsif ($application eq "bl2seq") {
61    $cmd = &handle_bl2seq(\$print_only);
62} elsif ($application eq "rpsblast") {
63    $cmd = &handle_rpsblast(\$print_only);
64} elsif ($application eq "fastacmd") {
65    $cmd = &handle_fastacmd(\$print_only);
66} elsif ($application eq "formatdb") {
67    $cmd = &handle_formatdb(\$print_only);
68} elsif ($application eq "seedtop") {
69    $cmd = &handle_seedtop(\$print_only);
70} elsif ($application =~ /version/) {
71    my $revision = '$Revision: 608983 $';
72    $revision =~ s/\$Revision: | \$//g;
73    print "$0 version $revision\n";
74    goto CLEAN_UP;
75} elsif ($application =~ /help/) {
76    pod2usage({-exitval => 1, -verbose => 2});
77} else {
78    die "Application: '$application' is not supported\n";
79}
80
81if ($print_only) {
82    print "$cmd\n";
83} else {
84    print STDERR "$cmd\n" if (DEBUG);
85    my $rv = system($cmd);
86    unless ($rv == 0) {
87        die "Program failed, try executing the command manually.\n";
88    }
89}
90
91CLEAN_UP:
92unlink foreach (@files2delete);
93
94# Only add quotation marks in case there are spaces in the database argument
95sub create_db_argument($)
96{
97    my $arg = shift;
98    my $retval = "-db ";
99    $retval .= ( ($arg =~ /\s/) ? "\"$arg\" " : "$arg ");
100    return $retval;
101}
102
103# Converts floating point numbers to integers
104sub convert_float_to_int($)
105{
106    my $float_arg = shift;
107    my $retval = 0;
108    if ($float_arg =~ /(\d+)e([+-])(\d+)/) {
109        $retval = $1;
110        if ($2 eq "+") {
111            $retval *= 10**$3
112        } else {
113            $retval /= 10**$3
114        }
115    } else {
116        $retval = int($float_arg);
117    }
118    return $retval;
119}
120
121# Add the .exe extension for binaries if necessary on windows
122sub add_exe_extension()
123{
124    return ($^O =~ /mswin|cygwin/i) ? ".exe " : " ";
125}
126
127sub convert_sequence_locations($$)
128{
129    my $arg = shift;
130    my $target = shift;
131    my $retval;
132    if (defined $arg) {
133        if ($target eq "query") {
134            $retval .= "-query_loc ";
135        } elsif ($target eq "range") {
136            $retval .= "-range ";
137        } else {
138            $retval .= "-subject_loc ";
139        }
140        my @fields = split(/[ ;,]/, $arg);
141        $retval .= "$fields[0]-$fields[1] ";
142    }
143    return $retval;
144}
145
146sub convert_filter_string($$)
147{
148    my $filter_string = shift;
149    my $program = shift;
150
151    #print STDERR "Parsing '$filter_string'\n";
152
153    if ($filter_string =~ /F/) {
154        if ($program eq "blastp" or $program eq "tblastn" or
155            $program eq "blastx" or $program eq "tblastx") {
156            return "-seg no ";
157        } else {
158            return "-dust no ";
159        }
160    }
161
162    my $retval = "";
163    if ($filter_string =~ /S (\d+) (\S+) (\S+)/) {
164        $retval .= "-seg '$1 $2 $3' ";
165    }
166    if ($filter_string =~ /D (\d+) (\d+) (\d+)/) {
167        $retval .= "-dust '$1 $2 $3' ";
168    }
169    if ($filter_string =~ /R -d (\S+)/) {
170        $retval .= "-filtering_db $1 ";
171    } elsif ($filter_string =~ /R\s*;/) {
172        $retval .= "-filtering_db repeat/repeat_9606 ";
173    }
174
175    if ($filter_string =~ /L|T|S|D/ and not ($retval =~ /seg|dust/)) {
176        if ($program eq "blastp" or $program eq "tblastn" or
177            $program eq "blastx") {
178            $retval .= "-seg yes ";
179        } else {
180            $retval .= "-dust yes ";
181        }
182    }
183
184    if ($filter_string =~ /m/) {
185        $retval .= "-soft_masking true ";
186    }
187    #print STDERR "returning '$retval'\n";
188    return $retval;
189}
190
191sub convert_strand($)
192{
193    my $old_strand_arg = shift;
194    my $retval = "-strand ";
195    if ($old_strand_arg == 1) {
196        $retval .= "plus ";
197    } elsif ($old_strand_arg == 2) {
198        $retval .= "minus ";
199    } else {
200        $retval .= "both ";
201    }
202    return $retval;
203}
204
205# Handle the conversion from blastall arguments to the corresponding C++
206# binaries
207sub handle_blastall($)
208{
209    my $print_only = shift;
210    my $path = DEFAULT_PATH;
211    my ($opt_A, $opt_B, $opt_C, $opt_D, $opt_E, $opt_F, $opt_G, $opt_I, $opt_J,
212        $opt_K, $opt_L, $opt_M, $opt_O, $opt_P, $opt_Q, $opt_R, $opt_S, $opt_T,
213        $opt_U, $opt_V, $opt_W, $opt_X, $opt_Y, $opt_Z, $opt_a, $opt_b, $opt_d,
214        $opt_e, $opt_f, $opt_g, $opt_i, $opt_l, $opt_m, $opt_n, $opt_o, $opt_p,
215        $opt_q, $opt_r, $opt_s, $opt_t, $opt_v, $opt_w, $opt_y, $opt_z);
216
217    GetOptions("<>"             => sub { $application = shift; },
218               "print_only!"    => $print_only,
219               "path=s"         => \$path,
220               "A=i"            => \$opt_A,
221               "B=i"            => \$opt_B, # not handled, not applicable
222               "C=s"            => \$opt_C,
223               "D=i"            => \$opt_D,
224               "E=i"            => \$opt_E,
225               "F=s"            => \$opt_F,
226               "G=i"            => \$opt_G,
227               "I:s"            => \$opt_I,
228               "J:s"            => \$opt_J,
229               "K=i"            => \$opt_K,
230               "L=s"            => \$opt_L,
231               "M=s"            => \$opt_M,
232               "O=s"            => \$opt_O,
233               "P=i"            => \$opt_P,
234               "Q=i"            => \$opt_Q,
235               "R=s"            => \$opt_R,
236               "S=i"            => \$opt_S,
237               "T:s"            => \$opt_T,
238               "U:s"            => \$opt_U,
239               "V:s"            => \$opt_V, # not handled, not applicable
240               "W=i"            => \$opt_W,
241               "X=i"            => \$opt_X,
242               "Y=f"            => \$opt_Y,
243               "Z=i"            => \$opt_Z,
244               "a=i"            => \$opt_a,
245               "b=i"            => \$opt_b,
246               "d=s"            => \$opt_d,
247               "e=f"            => \$opt_e,
248               "f=i"            => \$opt_f,
249               "g:s"            => \$opt_g,
250               "i=s"            => \$opt_i,
251               "l=s"            => \$opt_l,
252               "m=i"            => \$opt_m,
253               "n:s"            => \$opt_n,
254               "o=s"            => \$opt_o,
255               "p=s"            => \$opt_p,
256               "q=i"            => \$opt_q,
257               "r=i"            => \$opt_r,
258               "s:s"            => \$opt_s,
259               "t=i"            => \$opt_t,
260               "v=i"            => \$opt_v,
261               "w=i"            => \$opt_w,
262               "y=f"            => \$opt_y,
263               "z=f"            => \$opt_z,
264               );
265
266    unless (defined $opt_p) {
267        die "-p must be provided\n";
268    }
269
270    my $retval = $path;
271    if (defined $opt_p) {
272        if (defined $opt_R) {
273            $retval .= "/tblastn";
274            $retval .= &add_exe_extension();
275            $retval .= "-in_pssm $opt_R ";
276        } elsif (defined $opt_n and $opt_n =~ /t/i) {
277            $retval .= "/blastn";
278            $retval .= &add_exe_extension();
279            $retval .= "-task megablast ";
280        } else {
281            $retval .= "/$opt_p";
282            $retval .= &add_exe_extension();
283            $retval .= "-task blastn " if ($opt_p eq "blastn");
284        }
285    }
286    $retval .= &create_db_argument($opt_d)  if (defined $opt_d);
287    unless (($retval =~ /\/tblastn/) and defined $opt_R) {
288        $retval .= "-query $opt_i "         if (defined $opt_i);
289    }
290    $retval .= "-gilist $opt_l "            if (defined $opt_l);
291    $retval .= "-dbsize $opt_z "            if (defined $opt_z);
292    $retval .= "-matrix $opt_M "            if (defined $opt_M);
293    $retval .= "-evalue $opt_e "            if (defined $opt_e);
294    $retval .= "-gapopen $opt_G "           if (defined $opt_G);
295    $retval .= "-gapextend $opt_E "         if (defined $opt_E);
296    $retval .= "-xdrop_ungap $opt_y "       if (defined $opt_y);
297    $retval .= "-xdrop_gap $opt_X "         if (defined $opt_X);
298    $retval .= "-xdrop_gap_final $opt_Z "   if (defined $opt_Z);
299    $retval .= "-num_threads $opt_a "       if (defined $opt_a);
300    if (defined $opt_A) {
301        if (defined $opt_P and $opt_P ne "0") {
302            print STDERR "Warning: ignoring -P because window size is set\n";
303        }
304        $retval .= "-window_size $opt_A "
305    }
306    if (defined $opt_P and $opt_P eq "1" and (not defined $opt_A)) {
307        $retval .= "-window_size 0 ";
308    }
309    $retval .= "-word_size $opt_W "         if (defined $opt_W);
310    if (defined $opt_Y) {
311        $retval .= "-searchsp " . &convert_float_to_int($opt_Y) . " ";
312    }
313    if (defined $opt_f) {
314        unless ($opt_p eq "blastn") {
315            $retval .= "-min_word_score $opt_f "
316        } else {
317            print STDERR "Warning: -f is not supported for blastn\n";
318        }
319    }
320    if (defined $opt_I and (length($opt_I) == 0 or $opt_I =~ /t/i)) {
321        $retval .= "-show_gis ";
322    }
323    $retval .= "-num_descriptions $opt_v "  if (defined $opt_v);
324    $retval .= "-num_alignments $opt_b "    if (defined $opt_b);
325    $retval .= "-query_gencode $opt_Q "     if (defined $opt_Q);
326    $retval .= "-db_gencode $opt_D "        if (defined $opt_D);
327    $retval .= "-penalty $opt_q "           if (defined $opt_q);
328    $retval .= "-reward $opt_r "            if (defined $opt_r);
329    $retval .= "-culling_limit $opt_K "     if (defined $opt_K);
330    $retval .= "-max_intron_length $opt_t " if (defined $opt_t);
331    $retval .= "-frame_shift_penalty $opt_w " if (defined $opt_w);
332    $retval .= "-comp_based_stats $opt_C "  if (defined $opt_C);
333    $retval .= "-out $opt_o "               if (defined $opt_o);
334    if (defined $opt_m) {
335        if ($opt_m == 5 or $opt_m == 6) {
336            print STDERR "Warning: -m5 or -m6 formatting options ";
337            print STDERR "are not supported!\n";
338        }
339        $opt_m -= 2 if ($opt_m >= 7);
340        $retval .= "-outfmt $opt_m "
341    }
342    if (defined $opt_O) {
343        unless ($retval =~ s/-out \S+ /-out $opt_O /) {
344            $retval .= "-out $opt_O ";
345        }
346        unless ($retval =~ s/-outfmt \d+/-outfmt 8/) {
347            $retval .= "-outfmt 8 ";
348        } else {
349            print STDERR "Warning: overriding output format\n";
350        }
351    }
352    if (defined $opt_T and (length($opt_T) == 0 or $opt_T =~ /t/i)) {
353        $retval .= "-html "
354    }
355
356    $retval .= &convert_sequence_locations($opt_L, "query") if ($opt_L);
357    if (defined $opt_U and (length($opt_U) == 0 or $opt_U =~ /t/i)) {
358        $retval .= "-lcase_masking ";
359    }
360    if (defined $opt_g and $opt_g =~ /f/i) {
361        $retval .= "-ungapped ";
362    }
363    if (defined $opt_J and (length($opt_J) == 0 or $opt_J =~ /t/i)) {
364        $retval .= "-parse_deflines ";
365    }
366    $retval .= &convert_strand($opt_S) if (defined $opt_S and not
367                                           ($opt_p ne "blastp" or
368                                            $opt_p ne "tblastn"));
369    if (defined $opt_s and (length($opt_s) == 0 or $opt_s =~ /t/i)) {
370        $retval .= "-use_sw_tback ";
371    }
372
373    if (defined $opt_F) {
374        $retval .= &convert_filter_string($opt_F, $opt_p);
375    } elsif (not defined $opt_F and $opt_p eq "blastp") {
376        $retval .= &convert_filter_string("T", $opt_p);
377    }
378
379    return $retval;
380}
381
382sub handle_seedtop($)
383{
384    my $print_only = shift;
385    my $path = DEFAULT_PATH;
386    my ($opt_C, $opt_D, $opt_E, $opt_F, $opt_G, $opt_I, $opt_J, $opt_K, $opt_M,
387        $opt_O, $opt_S, $opt_X, $opt_d, $opt_e, $opt_f, $opt_i, $opt_k, $opt_o,
388        $opt_p, $opt_q, $opt_r);
389
390    GetOptions("<>"             => sub { $application = shift; },
391               "print_only!"    => $print_only,
392               "path=s"         => \$path,
393               "C=i"            => \$opt_C,
394               "D=i"            => \$opt_D,
395               "E=i"            => \$opt_E,
396               "F:s"            => \$opt_F,
397               "G=i"            => \$opt_G,
398               "I:s"            => \$opt_I,
399               "J:s"            => \$opt_J,
400               "K=i"            => \$opt_K,
401               "M=s"            => \$opt_M,
402               "O=s"            => \$opt_O,
403               "S=i"            => \$opt_S,
404               "X=i"            => \$opt_X,
405               "d=s"            => \$opt_d,
406               "e=f"            => \$opt_e,
407               "f:s"            => \$opt_f,
408               "i=s"            => \$opt_i,
409               "k=s"            => \$opt_k,
410               "o=s"            => \$opt_o,
411               "p=s"            => \$opt_p,
412               "q=i"            => \$opt_q,
413               "r=i"            => \$opt_r
414               );
415
416    my $retval = $path;
417    $retval .= "/psiblast";
418    $retval .= &add_exe_extension();
419    $retval .= "-query $opt_i "             if (defined $opt_i);
420    $retval .= "-phi_pattern $opt_k "       if (defined $opt_k);
421    $retval .= &create_db_argument($opt_d)  if (defined $opt_d);
422    $retval .= "-gapopen $opt_G "           if (defined $opt_G);
423    $retval .= "-gapextend $opt_E "         if (defined $opt_E);
424    $retval .= "-out $opt_o "               if (defined $opt_o);
425    if (defined $opt_O) {
426        unless ($retval =~ s/-out \S+ /-out $opt_O /) {
427            $retval .= "-out $opt_O ";
428        }
429        unless ($retval =~ s/-outfmt \d+/-outfmt 8/) {
430            $retval .= "-outfmt 8 ";
431        } else {
432            print STDERR "Warning: overriding output format\n";
433        }
434    }
435    if (defined $opt_I and (length($opt_I) == 0 or $opt_I =~ /t/i)) {
436        $retval .= "-show_gis ";
437    }
438    if (defined $opt_J and (length($opt_J) == 0 or $opt_J =~ /t/i)) {
439        $retval .= "-parse_deflines ";
440    }
441    $retval .= "-xdrop_gap $opt_X "         if (defined $opt_X);
442    $retval .= "-evalue $opt_e "            if (defined $opt_e);
443    $retval .= "-matrix $opt_M "            if (defined $opt_M);
444    my $query_is_protein = "1";
445    if (defined $opt_p) {
446        unless ($opt_p eq "patseedp") {
447            die "Only patseedp program is supported\n";
448        }
449        # Change query_is_protein if other programs are supported
450    } else {
451        die "Program must be specified\n";
452    }
453    if (defined $opt_F) {
454        $retval .= &convert_filter_string($opt_F,
455                                          ($query_is_protein eq "1")
456                                          ? "blastp" : "blastn");
457    }
458
459    # Unsupported options
460    if (defined $opt_D) {
461        print STDERR "Warning: -D option is not supported!\n";
462    }
463    if (defined $opt_S) {
464        print STDERR "Warning: -S option is not supported!\n";
465    }
466    if (defined $opt_C) {
467        print STDERR "Warning: -C option is not supported!\n";
468    }
469    if (defined $opt_q) {
470        print STDERR "Warning: -q option is not supported!\n";
471    }
472    if (defined $opt_r) {
473        print STDERR "Warning: -r option is not supported!\n";
474    }
475    if (defined $opt_f) {
476        print STDERR "Warning: -f option is not supported!\n";
477    }
478    if (defined $opt_K) {
479        print STDERR "Warning: -K option is not supported!\n";
480    }
481    return $retval;
482}
483
484sub handle_megablast($)
485{
486    my $print_only = shift;
487    my $path = DEFAULT_PATH;
488    my ($opt_A, $opt_D, $opt_E, $opt_F, $opt_G, $opt_H, $opt_I, $opt_J,
489        $opt_L, $opt_M, $opt_N, $opt_O, $opt_P, $opt_Q, $opt_R, $opt_S,
490        $opt_T, $opt_U, $opt_V, $opt_W, $opt_X, $opt_Y, $opt_Z, $opt_a,
491        $opt_b, $opt_d, $opt_e, $opt_f, $opt_g, $opt_i, $opt_l, $opt_m,
492        $opt_n, $opt_o, $opt_p, $opt_q, $opt_r, $opt_s, $opt_t, $opt_v,
493        $opt_y, $opt_z);
494
495    GetOptions("<>"             => sub { $application = shift; },
496               "print_only!"    => $print_only,
497               "path=s"         => \$path,
498               "A=i"            => \$opt_A,
499               "D=i"            => \$opt_D,
500               "E=i"            => \$opt_E,
501               "F=s"            => \$opt_F,
502               "G=i"            => \$opt_G,
503               "H=i"            => \$opt_H,
504               "I:s"            => \$opt_I,
505               "J:s"            => \$opt_J,
506               "L=s"            => \$opt_L,
507               "M=i"            => \$opt_M,
508               "N=i"            => \$opt_N,
509               "O=s"            => \$opt_O,
510               "P=i"            => \$opt_P, # no equivalent in new engine
511               "Q=s"            => \$opt_Q,
512               "R:s"            => \$opt_R,
513               "S=i"            => \$opt_S,
514               "T:s"            => \$opt_T,
515               "U:s"            => \$opt_U,
516               "V:s"            => \$opt_V, # not handled, not applicable
517               "W=i"            => \$opt_W,
518               "X=i"            => \$opt_X,
519               "Y=f"            => \$opt_Y,
520               "Z=i"            => \$opt_Z,
521               "a=i"            => \$opt_a,
522               "b=i"            => \$opt_b,
523               "d=s"            => \$opt_d,
524               "e=f"            => \$opt_e,
525               "f:s"            => \$opt_f,
526               "g:s"            => \$opt_g,
527               "i=s"            => \$opt_i,
528               "l=s"            => \$opt_l,
529               "m=i"            => \$opt_m,
530               "n:s"            => \$opt_n,
531               "o=s"            => \$opt_o,
532               "p=f"            => \$opt_p,
533               "q=i"            => \$opt_q,
534               "r=i"            => \$opt_r,
535               "s=i"            => \$opt_s,
536               "t=i"            => \$opt_t,
537               "v=i"            => \$opt_v,
538               "y=i"            => \$opt_y,
539               "z=f"            => \$opt_z
540               );
541    my $retval = $path;
542
543    $retval .= "/blastn";
544    $retval .= &add_exe_extension();
545    $retval .= "-query $opt_i "             if (defined $opt_i);
546    $retval .= &create_db_argument($opt_d)  if (defined $opt_d);
547    $retval .= "-evalue $opt_e "            if (defined $opt_e);
548    $retval .= "-xdrop_gap $opt_X "         if (defined $opt_X);
549    $retval .= "-gilist $opt_l "            if (defined $opt_l);
550    $retval .= "-penalty $opt_q "           if (defined $opt_q);
551    $retval .= "-reward $opt_r "            if (defined $opt_r);
552    $retval .= "-gapopen $opt_G "           if (defined $opt_G);
553    $retval .= "-gapextend $opt_E "         if (defined $opt_E);
554    $retval .= "-out $opt_o "               if (defined $opt_o);
555    if (defined $opt_m) {
556        if ($opt_m == 5 or $opt_m == 6) {
557            print STDERR "Warning: -m5 or -m6 formatting options ";
558            print STDERR "are not supported!\n";
559        }
560        $opt_m -= 2 if ($opt_m >= 7);
561        $retval .= "-outfmt $opt_m "
562    }
563    if (defined $opt_O) {
564        unless ($retval =~ s/-out \S+ /-out $opt_O /) {
565            $retval .= "-out $opt_O ";
566        }
567        unless ($retval =~ s/-outfmt \d+/-outfmt 8/) {
568            $retval .= "-outfmt 8 ";
569        } else {
570            print STDERR "Warning: overriding output format\n";
571        }
572    }
573    if (defined $opt_T and (length($opt_T) == 0 or $opt_T =~ /t/i)) {
574        $retval .= "-html "
575    }
576    $retval .= "-num_descriptions $opt_v "  if (defined $opt_v);
577    $retval .= "-num_alignments $opt_b "    if (defined $opt_b);
578    $retval .= "-num_threads $opt_a "       if (defined $opt_a);
579    $retval .= "-word_size $opt_W "         if (defined $opt_W);
580    $retval .= "-dbsize $opt_z "            if (defined $opt_z);
581    if (defined $opt_Y) {
582        $retval .= "-searchsp " . &convert_float_to_int($opt_Y) . " ";
583    }
584    $retval .= "-xdrop_ungap $opt_y "       if (defined $opt_y);
585    $retval .= "-xdrop_gap_final $opt_Z "   if (defined $opt_Z);
586    if (defined $opt_t) {
587        $retval .= "-template_length $opt_t ";
588        # Set the template type to the default value in megablast if not
589        # provided, as blastn requires it
590        $opt_N = 0 unless (defined $opt_N);
591    }
592    $retval .= "-window_size $opt_A "       if (defined $opt_A);
593    if (defined $opt_N) {
594        $retval .= "-template_type coding " if ($opt_N == 0);
595        $retval .= "-template_type optimal " if ($opt_N == 1);
596        $retval .= "-template_type coding_and_optimal " if ($opt_N == 2);
597    }
598    if (defined $opt_F) {
599        $retval .= &convert_filter_string($opt_F, "blastn");
600    }
601    if (defined $opt_J and (length($opt_J) == 0 or $opt_J =~ /t/i)) {
602        $retval .= "-parse_deflines ";
603    }
604
605
606    $retval .= "-perc_identity $opt_p " if (defined $opt_p);
607    $retval .= "-min_raw_gapped_score $opt_s " if (defined $opt_s);
608    $retval .= &convert_strand($opt_S) if (defined $opt_S);
609    $retval .= &convert_sequence_locations($opt_L, "query") if ($opt_L);
610    if (defined $opt_U and (length($opt_U) == 0 or $opt_U =~ /t/i)) {
611        $retval .= "-lcase_masking ";
612    }
613    if (defined $opt_n and (length($opt_n) == 0 or $opt_n =~ /t/i)) {
614        $retval .= "-no_greedy ";
615    }
616
617    # Unsupported options
618    # This option can be safely ignored
619    #if (defined $opt_M) {
620    #    print STDERR "Warning: -M option is ignored\n";
621    #}
622
623    my $tab_with_acc =
624        "-outfmt \"7 qacc sseqid pident length mismatch gapopen qstart qend " .
625        "sstart send evalue bitscore\" ";
626
627    # Here are some combinations of options and their equivalent conversion to
628    # the -outfmt option:
629    # NOTE: only in the last case we use sgi as the user explicitely requests
630    # the GIs to be shown (via -I), thus we assume the database/subjects will
631    # have GIs. We don't do the same for accessions, because if these are not
632    # available, an ordinal ID gets printed.
633    # -J -D3 -R -fF = -outfmt "7 qacc sseqid pident length mismatch gapopen
634    # qstart qend sstart send evalue bitscore"
635    # -J -D3 -R -fT = -outfmt "7 qseqid sseqid pident length mismatch gapopen
636    # qstart qend sstart send evalue bitscore"
637    # -J -D3 -R -fT -I = -outfmt "7 qgi sgi pident length mismatch gapopen
638    # qstart qend sstart send evalue bitscore"
639
640    if (defined $opt_D) {
641        if ($opt_D == 3) {  # tabular output
642            unless ($retval =~ s/-outfmt \d+/$tab_with_acc/) {
643                $retval .= "$tab_with_acc ";
644            } else {
645                print STDERR "Warning: overriding output format\n";
646            }
647        } elsif ($opt_D == 2) { # traditional BLAST output
648            unless ($retval =~ s/-outfmt \d+/-outfmt 0/) {
649                $retval .= "-outfmt 0 ";
650            } else {
651                print STDERR "Warning: overriding output format\n";
652            }
653        } elsif ($opt_D == 4) { # text ASN.1
654            unless ($retval =~ s/-outfmt \d+/-outfmt 8/) {
655                $retval .= "-outfmt 8 ";
656            } else {
657                print STDERR "Warning: overriding output format\n";
658            }
659        } elsif ($opt_D == 5) { # binary ASN.1
660            unless ($retval =~ s/-outfmt \d+/-outfmt 9/) {
661                $retval .= "-outfmt 9 ";
662            } else {
663                print STDERR "Warning: overriding output format\n";
664            }
665        } else {
666            print STDERR "Warning: -D option with value $opt_D is not " .
667                "supported!\n";
668        }
669    }
670
671    if (defined $opt_I and (length($opt_I) == 0 or $opt_I =~ /t/i)) {
672        $retval .= "-show_gis ";
673        $retval =~ s/qacc/qgi/;
674        $retval =~ s/sseqid/sgi/;
675    }
676    # -fF is the default, if -f or -fT is specified, we assume that's what's
677    # desired and we apply a modification to the previously set output format
678    # (we can safely assume this b/c -f only works with -D3)
679    if (defined $opt_f and (length($opt_f) == 0 or $opt_f =~ /t/i)) {
680        $retval =~ s/qacc/qseqid/;
681    }
682    if (defined $opt_R and not ($retval =~ /-outfmt.*7/)) {
683        print STDERR "Warning: -R option is deprecated, please rely on the ".
684            "application's exit code to determine its success or failure.\n" .
685            "0 means success, non-zero means failure\n";
686    }
687    # Deprecated options
688    if (defined $opt_g and $opt_g =~ /f/i) {
689        print STDERR "Warning: -g option is not supported!\n";
690    }
691    if (defined $opt_H) {
692        print STDERR "Warning -H option is not supported!\n";
693    }
694    if (defined $opt_Q) {
695        print STDERR "Warning: -Q option is deprecated\n";
696    }
697    if (defined $opt_P) {
698        print STDERR "Warning: -P option is deprecated\n";
699    }
700
701    return $retval;
702}
703
704sub handle_blastpgp($)
705{
706    my $print_only = shift;
707    my $path = DEFAULT_PATH;
708    my ($opt_A, $opt_B, $opt_C, $opt_E, $opt_F, $opt_G, $opt_H, $opt_I,
709        $opt_J, $opt_K, $opt_L, $opt_M, $opt_N, $opt_O, $opt_P, $opt_Q,
710        $opt_R, $opt_S, $opt_T, $opt_U, $opt_W, $opt_X, $opt_Y, $opt_Z,
711        $opt_a, $opt_b, $opt_c, $opt_d, $opt_e, $opt_f, $opt_h, $opt_i,
712        $opt_j, $opt_k, $opt_l, $opt_m, $opt_o, $opt_p, $opt_q, $opt_s,
713        $opt_t, $opt_u, $opt_v, $opt_y, $opt_z);
714
715    GetOptions("<>"             => sub { $application = shift; },
716               "print_only!"    => $print_only,
717               "path=s"         => \$path,
718               "A=i"            => \$opt_A,
719               "B=s"            => \$opt_B,
720               "C=s"            => \$opt_C,
721               "E=i"            => \$opt_E,
722               "F=s"            => \$opt_F,
723               "G=i"            => \$opt_G,
724               "H=i"            => \$opt_H,
725               "I:s"            => \$opt_I,
726               "J:s"            => \$opt_J,
727               "K=i"            => \$opt_K,
728               "L=i"            => \$opt_L,
729               "M=s"            => \$opt_M,
730               "N=f"            => \$opt_N,
731               "O=s"            => \$opt_O,
732               "P=i"            => \$opt_P,
733               "Q=s"            => \$opt_Q,
734               "R=s"            => \$opt_R,
735               "S=i"            => \$opt_S,
736               "T:s"            => \$opt_T,
737               "U:s"            => \$opt_U,
738               "W=i"            => \$opt_W,
739               "X=i"            => \$opt_X,
740               "Y=f"            => \$opt_Y,
741               "Z=i"            => \$opt_Z,
742               "a=i"            => \$opt_a,
743               "b=i"            => \$opt_b,
744               "c=i"            => \$opt_c,
745               "d=s"            => \$opt_d,
746               "e=f"            => \$opt_e,
747               "f=i"            => \$opt_f,
748               "h=f"            => \$opt_h,
749               "i=s"            => \$opt_i,
750               "j=i"            => \$opt_j,
751               "k=s"            => \$opt_k,
752               "l=s"            => \$opt_l,
753               "m=i"            => \$opt_m,
754               "o=s"            => \$opt_o,
755               "p=s"            => \$opt_p,
756               "q=i"            => \$opt_q,
757               "s:s"            => \$opt_s,
758               "t=s"            => \$opt_t,
759               "u=i"            => \$opt_u,
760               "v=i"            => \$opt_v,
761               "y=f"            => \$opt_y,
762               "z=f"            => \$opt_z
763               );
764    my $retval = $path . "/psiblast";
765    $retval .= &add_exe_extension();
766
767    my $query_is_protein = "1";
768
769    if (defined $opt_p and not ($opt_p ne "blastpgp" or
770                                $opt_p ne "patseedp")) {
771        die "Program '$opt_p' not implemented\n";
772    }
773
774    $retval .= &create_db_argument($opt_d)  if (defined $opt_d);
775    $retval .= "-query $opt_i "             if (defined $opt_i);
776    $retval .= "-gilist $opt_l "            if (defined $opt_l);
777    $retval .= "-gap_trigger $opt_N "       if (defined $opt_N);
778    $retval .= "-matrix $opt_M "            if (defined $opt_M);
779    $retval .= "-num_iterations $opt_j "    if (defined $opt_j);
780    $retval .= "-min_word_score $opt_f "    if (defined $opt_f);
781    $retval .= "-evalue $opt_e "            if (defined $opt_e);
782    $retval .= "-gapopen $opt_G "           if (defined $opt_G);
783    $retval .= "-gapextend $opt_E "         if (defined $opt_E);
784    $retval .= "-num_threads $opt_a "       if (defined $opt_a);
785    $retval .= "-dbsize $opt_z "            if (defined $opt_z);
786    if (defined $opt_Y) {
787        $retval .= "-searchsp " . &convert_float_to_int($opt_Y) . " ";
788    }
789    $retval .= "-pseudocount $opt_c "       if (defined $opt_c);
790    $retval .= "-inclusion_ethresh $opt_h " if (defined $opt_h);
791    if (defined $opt_A) {
792        if (defined $opt_P and $opt_P ne "0") {
793            print STDERR "Warning: ignoring -P because window size is set\n";
794        }
795        $retval .= "-window_size $opt_A "
796    }
797    if (defined $opt_P and $opt_P eq "1" and (not defined $opt_A)) {
798        $retval .= "-window_size 0 ";
799    }
800    $retval .= "-word_size $opt_W "         if (defined $opt_W);
801    $retval .= "-xdrop_ungap $opt_y "       if (defined $opt_y);
802    $retval .= "-xdrop_gap $opt_X "         if (defined $opt_X);
803    $retval .= "-xdrop_gap_final $opt_Z "   if (defined $opt_Z);
804    $retval .= "-num_descriptions $opt_v "  if (defined $opt_v);
805    $retval .= "-num_alignments $opt_b "    if (defined $opt_b);
806    $retval .= "-culling_limit $opt_K "     if (defined $opt_K);
807    $retval .= "-comp_based_stats $opt_t "  if (defined $opt_t);
808    $retval .= "-phi_pattern $opt_k "       if (defined $opt_k);
809    $retval .= "-out $opt_o "               if (defined $opt_o);
810    $retval .= "-out_ascii_pssm $opt_Q "    if (defined $opt_Q);
811    $retval .= "-in_msa $opt_B "            if (defined $opt_B);
812
813    if (defined $opt_m) {
814        if ($opt_m == 5 or $opt_m == 6) {
815            print STDERR "Warning: -m5 or -m6 formatting options ";
816            print STDERR "are not supported!\n";
817        }
818        $opt_m -= 2 if ($opt_m >= 7);
819        $retval .= "-outfmt $opt_m "
820    }
821    if (defined $opt_O) {
822        unless ($retval =~ s/-out \S+ /-out $opt_O /) {
823            $retval .= "-out $opt_O ";
824        }
825        unless ($retval =~ s/-outfmt \d+/-outfmt 8/) {
826            $retval .= "-outfmt 8 ";
827        } else {
828            print STDERR "Warning: overriding output format\n";
829        }
830    }
831    if (defined $opt_T and (length($opt_T) == 0 or $opt_T =~ /t/i)) {
832        $retval .= "-html "
833    }
834    if (defined $opt_I and (length($opt_I) == 0 or $opt_I =~ /t/i)) {
835        $retval .= "-show_gis ";
836    }
837    if (defined $opt_J and (length($opt_J) == 0 or $opt_J =~ /t/i)) {
838        $retval .= "-parse_deflines ";
839    }
840    if (defined $opt_s and (length($opt_s) == 0 or $opt_s =~ /t/i)) {
841        $retval .= "-use_sw_tback ";
842    }
843    if (defined $opt_U and (length($opt_U) == 0 or $opt_U =~ /t/i)) {
844        $retval .= "-lcase_masking ";
845    }
846    if (defined $opt_F) {
847        $retval .= &convert_filter_string($opt_F,
848                                          ($query_is_protein eq "1")
849                                          ? "blastp" : "blastn");
850    }
851
852    my $location = "";
853    $location .= $opt_S if (defined $opt_S);
854    if (defined $opt_H) {
855        if ($location eq "") {
856            $location = "0,$opt_H";
857        } else {
858            $location .= ",$opt_H";
859        }
860    }
861    if ($location ne "") {
862        $location .= ",-1" unless (defined $opt_H);
863        $retval .= &convert_sequence_locations($location, "query");
864    }
865
866    # Checkpoint file recovery
867    if (defined $opt_R) {
868        if (defined $opt_q and $opt_q ne "0") {
869            $retval .= "-in_pssm $opt_R "
870        } else {
871            die "ERROR: recovery from C toolkit checkpoint " .
872                "file format not supported\n";
873        }
874    }
875
876    # Checkpoint file saving
877    if (defined $opt_C) {
878        if (defined $opt_C and $opt_u ne "0") {
879            $retval .= "-out_pssm $opt_C "
880        } else {
881            die "ERROR: saving PSSM to C toolkit checkpoint " .
882                "file format not supported\n";
883        }
884    }
885
886    return $retval;
887}
888
889# Tested: all conversions should work
890sub handle_bl2seq
891{
892    use File::Temp qw(:POSIX);  # for tmpnam
893
894    my $print_only = shift;
895    my $path = DEFAULT_PATH;
896    my ($opt_A, $opt_D, $opt_E, $opt_F, $opt_G, $opt_I, $opt_J, $opt_M,
897        $opt_S, $opt_T, $opt_U, $opt_V, $opt_W, $opt_X, $opt_Y, $opt_a,
898        $opt_d, $opt_e, $opt_g, $opt_i, $opt_j, $opt_m, $opt_o, $opt_p,
899        $opt_q, $opt_r, $opt_t);
900
901    GetOptions("<>"             => sub { $application = shift; },
902               "print_only!"    => $print_only,
903               "path=s"         => \$path,
904               "A:s"            => \$opt_A,
905               "D=i"            => \$opt_D,
906               "E=i"            => \$opt_E,
907               "F=s"            => \$opt_F,
908               "G=i"            => \$opt_G,
909               "I=s"            => \$opt_I,
910               "J=s"            => \$opt_J,
911               "M=s"            => \$opt_M,
912               "S=i"            => \$opt_S,
913               "T:s"            => \$opt_T,
914               "U:s"            => \$opt_U,
915               "V:s"            => \$opt_V, # not handled, not applicable
916               "W=i"            => \$opt_W,
917               "X=i"            => \$opt_X,
918               "Y=f"            => \$opt_Y,
919               "a=s"            => \$opt_a,
920               "d=f"            => \$opt_d,
921               "e=f"            => \$opt_e,
922               "g:s"            => \$opt_g,
923               "i=s"            => \$opt_i,
924               "j=s"            => \$opt_j,
925               "m:s"            => \$opt_m,
926               "o=s"            => \$opt_o,
927               "p=s"            => \$opt_p,
928               "q=i"            => \$opt_q,
929               "r=i"            => \$opt_r,
930               "t=i"            => \$opt_t
931               );
932    my $retval = $path;
933
934    unless (defined $opt_i and defined $opt_j) {
935        die "-i and -j are required in bl2seq\n";
936    }
937
938    if (defined $opt_p) {
939        $retval .= "/$opt_p";
940        $retval .= &add_exe_extension();
941    } else {
942        die "Program must be specified via the -p option\n";
943    }
944    unless (defined $opt_A) {
945        $retval .= "-query $opt_i "             if (defined $opt_i);
946        $retval .= "-subject $opt_j "           if (defined $opt_j);
947    } else {
948        # The -A option is not supported, so we create temporary files to
949        # simulate it (example input: bl2seq -i129295 -j104501 -pblastp -A)
950        my $query_fname = tmpnam();
951        open(Q, ">$query_fname") or die "Failed to open $query_fname: $!\n";
952        print Q "$opt_i" and close(Q);
953        push @files2delete, $query_fname;
954
955        my $subj_fname = tmpnam();
956        open(S, ">$subj_fname") or die "Failed to open $subj_fname: $!\n";
957        print S "$opt_j" and close(S);
958        push @files2delete, $subj_fname;
959        if (DEBUG) {
960            print STDERR "Created temp. files $query_fname and $subj_fname\n";
961        }
962
963        $retval .= "-query $query_fname -subject $subj_fname ";
964        if ($$print_only) {
965            print STDERR "Warning: arguments to -query and -subject must be ";
966            print STDERR "files containing the\narguments to bl2seq's -i and ";
967            print STDERR "-j arguments respectively.\n";
968        }
969    }
970    $retval .= "-out $opt_o "               if (defined $opt_o);
971    if (defined $opt_a) {
972        unless ($retval =~ s/-out \S+ /-out $opt_a /) {
973            $retval .= "-out $opt_a ";
974        }
975        unless ($retval =~ s/-outfmt \d+/-outfmt 8/) {
976            $retval .= "-outfmt 8 ";
977        } else {
978            print STDERR "Warning: overriding output format\n";
979        }
980    }
981    if (defined $opt_D and $opt_D =~ /1/) {
982        if ($retval =~ s/-outfmt \d+/-outfmt 7/) {
983            print STDERR "Warning: overriding output format\n";
984        } else {
985            $retval .= "-outfmt 7 ";
986        }
987    }
988    if (defined $opt_T and (length($opt_T) == 0 or $opt_T =~ /t/i)) {
989        $retval .= "-html "
990    }
991    $retval .= "-evalue $opt_e "            if (defined $opt_e);
992    $retval .= "-gapopen $opt_G "           if (defined $opt_G);
993    $retval .= "-gapextend $opt_E "         if (defined $opt_E);
994    $retval .= "-word_size $opt_W "         if (defined $opt_W);
995    $retval .= "-matrix $opt_M "            if (defined $opt_M);
996    $retval .= "-penalty $opt_q "           if (defined $opt_q);
997    $retval .= "-reward $opt_r "            if (defined $opt_r);
998    $retval .= &convert_strand($opt_S)      if (defined $opt_S);
999    $retval .= "-max_intron_length $opt_t " if (defined $opt_t);
1000    $retval .= "-dbsize $opt_d "            if (defined $opt_d);
1001    $retval .= "-xdrop_gap $opt_X "         if (defined $opt_X);
1002    if (defined $opt_Y) {
1003        $retval .= "-searchsp " . &convert_float_to_int($opt_Y) . " ";
1004    }
1005    if (defined $opt_U and (length($opt_U) == 0 or $opt_U =~ /t/i)) {
1006        $retval .= "-lcase_masking ";
1007    }
1008    if (defined $opt_m and (length($opt_m) == 0 or $opt_m =~ /t/i)) {
1009        $retval .= "-task megablast ";
1010    }
1011    if (defined $opt_g and $opt_g =~ /f/i) {
1012        $retval .= "-ungapped ";
1013    }
1014    $retval .= &convert_sequence_locations($opt_I, "query") if ($opt_I);
1015    $retval .= &convert_sequence_locations($opt_J, "subject") if ($opt_J);
1016
1017    if (defined $opt_F) {
1018        $retval .= &convert_filter_string($opt_F, $opt_p);
1019    }
1020
1021    return $retval;
1022}
1023
1024sub handle_rpsblast
1025{
1026    my $print_only = shift;
1027    my $path = DEFAULT_PATH;
1028    my ($opt_F, $opt_I, $opt_J, $opt_L, $opt_N, $opt_O, $opt_P, $opt_T,
1029        $opt_U, $opt_V, $opt_X, $opt_Y, $opt_Z, $opt_a, $opt_b, $opt_d,
1030        $opt_e, $opt_i, $opt_l, $opt_m, $opt_o, $opt_p, $opt_v, $opt_y,
1031        $opt_z);
1032
1033    GetOptions("<>"             => sub { $application = shift; },
1034               "print_only!"    => $print_only,
1035               "path=s"         => \$path,
1036               "F=s"            => \$opt_F,
1037               "I:s"            => \$opt_I,
1038               "J:s"            => \$opt_J,
1039               "L=s"            => \$opt_L,
1040               "N=f"            => \$opt_N,
1041               "O=s"            => \$opt_O,
1042               "P=i"            => \$opt_P,
1043               "T:s"            => \$opt_T,
1044               "U:s"            => \$opt_U,
1045               "V=s"            => \$opt_V,
1046               "X=i"            => \$opt_X,
1047               "Y=f"            => \$opt_Y,
1048               "Z=i"            => \$opt_Z,
1049               "a=i"            => \$opt_a,
1050               "b=i"            => \$opt_b,
1051               "d=s"            => \$opt_d,
1052               "e=f"            => \$opt_e,
1053               "i=s"            => \$opt_i,
1054               "l=s"            => \$opt_l,
1055               "m=i"            => \$opt_m,
1056               "o=s"            => \$opt_o,
1057               "p:s"            => \$opt_p,
1058               "v=i"            => \$opt_v,
1059               "y=f"            => \$opt_y,
1060               "z=f"            => \$opt_z
1061               );
1062    my $retval = $path;
1063
1064    if (defined $opt_p and $opt_p =~ /f/i) {
1065        $retval .= "/rpstblastn";
1066    } else {
1067        $retval .= "/rpsblast";
1068    }
1069    $retval .= &add_exe_extension();
1070
1071    $retval .= "-query $opt_i "             if (defined $opt_i);
1072    $retval .= &create_db_argument($opt_d)  if (defined $opt_d);
1073    $retval .= "-evalue $opt_e "            if (defined $opt_e);
1074    $retval .= "-out $opt_o "               if (defined $opt_o);
1075    $retval .= "-xdrop_ungap $opt_y "       if (defined $opt_y);
1076    $retval .= "-xdrop_gap $opt_X "         if (defined $opt_X);
1077    $retval .= "-min_raw_gapped_score $opt_N " if (defined $opt_N);
1078    $retval .= "-num_threads $opt_a "       if (defined $opt_a);
1079    $retval .= "-num_descriptions $opt_v "  if (defined $opt_v);
1080    $retval .= "-num_alignments $opt_b "    if (defined $opt_b);
1081    $retval .= "-dbsize $opt_z "            if (defined $opt_z);
1082    if (defined $opt_Y) {
1083        $retval .= "-searchsp " . &convert_float_to_int($opt_Y) . " ";
1084    }
1085    $retval .= "-xdrop_gap_final $opt_Z "   if (defined $opt_Z);
1086    if (defined $opt_m) {
1087        if ($opt_m == 5 or $opt_m == 6) {
1088            print STDERR "Warning: -m5 or -m6 formatting options ";
1089            print STDERR "are not supported!\n";
1090        }
1091        $opt_m -= 2 if ($opt_m >= 7);
1092        $retval .= "-outfmt $opt_m "
1093    }
1094    if (defined $opt_O) {
1095        unless ($retval =~ s/-out \S+ /-out $opt_O /) {
1096            $retval .= "-out $opt_O ";
1097        }
1098        unless ($retval =~ s/-outfmt \d+/-outfmt 8/) {
1099            $retval .= "-outfmt 8 ";
1100        } else {
1101            print STDERR "Warning: overriding output format\n";
1102        }
1103    }
1104    if (defined $opt_T and (length($opt_T) == 0 or $opt_T =~ /t/i)) {
1105        $retval .= "-html "
1106    }
1107    if (defined $opt_P and $opt_P eq "1") {
1108        $retval .= "-window_size 0 ";
1109    }
1110    if (defined $opt_F) {
1111        $retval .= &convert_filter_string($opt_F, "blastp");
1112    }
1113    if (defined $opt_I and (length($opt_I) == 0 or $opt_I =~ /t/i)) {
1114        $retval .= "-show_gis ";
1115    }
1116    if (defined $opt_J and (length($opt_J) == 0 or $opt_J =~ /t/i)) {
1117        $retval .= "-parse_deflines ";
1118    }
1119    if (defined $opt_U and (length($opt_U) == 0 or $opt_U =~ /t/i)) {
1120        $retval .= "-lcase_masking ";
1121    }
1122    $retval .= &convert_sequence_locations($opt_L, "query") if ($opt_L);
1123
1124    return $retval;
1125}
1126
1127sub handle_fastacmd
1128{
1129    my $print_only = shift;
1130    my $path = DEFAULT_PATH;
1131    my ($opt_d, $opt_p, $opt_s, $opt_i, $opt_a, $opt_l, $opt_t, $opt_o,
1132        $opt_c, $opt_D, $opt_L, $opt_S, $opt_T, $opt_I, $opt_P);
1133
1134    GetOptions("<>"             => sub { $application = shift; },
1135               "print_only!"    => $print_only,
1136               "path=s"         => \$path,
1137               "D=i"            => \$opt_D,
1138               "I:s"            => \$opt_I,
1139               "L=s"            => \$opt_L,
1140               "P=i"            => \$opt_P,
1141               "S=i"            => \$opt_S,
1142               "T:s"            => \$opt_T,
1143               "a:s"            => \$opt_a,
1144               "c:s"            => \$opt_c,
1145               "d=s"            => \$opt_d,
1146               "i=s"            => \$opt_i,
1147               "l=i"            => \$opt_l,
1148               "o=s"            => \$opt_o,
1149               "p=s"            => \$opt_p,
1150               "s=s"            => \$opt_s,
1151               "t:s"            => \$opt_t
1152               );
1153
1154    my $retval = $path . "/blastdbcmd";
1155    $retval .= &add_exe_extension();
1156    $retval .= &create_db_argument($opt_d)  if (defined $opt_d);
1157    if (defined $opt_p) {
1158        $retval .= "-dbtype ";
1159        if ($opt_p =~ /p/i) {
1160            $retval .= "prot ";
1161        } elsif ($opt_p =~ /f/i) {
1162            $retval .= "nucl ";
1163        } else {
1164            $retval .= "guess ";
1165        }
1166    }
1167    $retval .= "-entry $opt_s "             if (defined $opt_s);
1168    $retval .= "-entry_batch $opt_i "       if (defined $opt_i);
1169    $retval .= "-line_length $opt_l "       if (defined $opt_l);
1170    $retval .= "-out $opt_o "               if (defined $opt_o);
1171    $retval .= "-pig $opt_P "               if (defined $opt_P);
1172    if (defined $opt_D) {
1173        $retval .= "-entry all -outfmt ";
1174        if ($opt_D eq '1') {
1175            $retval .= "\%f ";
1176        } elsif ($opt_D eq '2') {
1177            $retval .= "\%g ";
1178        } elsif ($opt_D eq '3') {
1179            $retval .= "\%a ";
1180        } else {
1181            die "Invalid argument to -D\n";
1182        }
1183    }
1184    $retval .= &convert_sequence_locations($opt_L, "range") if ($opt_L);
1185    $retval .= &convert_strand($opt_S) if (defined $opt_S);
1186    if (defined $opt_T) {
1187        #print STDERR "Warning: -T option is not supported, please use " .
1188        #    "the -outfmt option to blastdbcmd with \%T, \%L, or \%S as an " .
1189        #    "argument\n";
1190        $retval .= "-outfmt \"NCBI Taxonomy id: \%T; Common name: \%L; ";
1191        $retval .= "Scientific name: \%S\" ";
1192    }
1193    if (defined $opt_I and (length($opt_I) == 0 or $opt_I =~ /t/i)) {
1194        $retval .= "-info ";
1195    }
1196    if (defined $opt_a and (length($opt_a) == 0 or $opt_a =~ /t/i)) {
1197        $retval .= "-get_dups ";
1198    }
1199    if (defined $opt_t and (length($opt_t) == 0 or $opt_t =~ /t/i)) {
1200        $retval .= "-target_only ";
1201    }
1202    if (defined $opt_c and (length($opt_c) == 0 or $opt_c =~ /t/i)) {
1203        $retval .= "-ctrl_a ";
1204    }
1205    return $retval;
1206}
1207
1208sub handle_formatdb
1209{
1210    my $print_only = shift;
1211    my $path = DEFAULT_PATH;
1212    my ($opt_B, $opt_F, $opt_L, $opt_T, $opt_V, $opt_a, $opt_b, $opt_e, $opt_i,
1213        $opt_l, $opt_n, $opt_o, $opt_p, $opt_s, $opt_t, $opt_v);
1214
1215    GetOptions("<>"             => sub { $application = shift; },
1216               "print_only!"    => $print_only,
1217               "path=s"         => \$path,
1218               "B=s"            => \$opt_B,
1219               "F=s"            => \$opt_F,
1220               "L=s"            => \$opt_L,
1221               "T=s"            => \$opt_T,
1222               "V:s"            => \$opt_V,
1223               "a:s"            => \$opt_a,
1224               "b:s"            => \$opt_b,
1225               "e:s"            => \$opt_e,
1226               "i=s"            => \$opt_i,
1227               "l=s"            => \$opt_l,
1228               "n=s"            => \$opt_n,
1229               "o:s"            => \$opt_o,
1230               "p:s"            => \$opt_p,
1231               "s:s"            => \$opt_s,
1232               "t=s"            => \$opt_t,
1233               "v=i"            => \$opt_v
1234               );
1235
1236    my $retval = $path;
1237    if (defined $opt_L) {
1238        $retval .= "/blastdb_aliastool";
1239        die "-i is required\n" unless (defined $opt_i);
1240        die "-F is required\n" unless (defined $opt_F);
1241    } else {
1242        $retval .= "/makeblastdb";
1243    }
1244    $retval .= &add_exe_extension();
1245
1246    if (defined $opt_B) {
1247        die "-F option must be specified with -B\n" unless (defined $opt_F);
1248        $retval = $path . "/blastdb_aliastool";
1249        $retval .= &add_exe_extension();
1250        $retval .= "-gi_file_in $opt_F -gi_file_out $opt_B";
1251        return $retval;
1252    }
1253
1254    $retval .= "-title \"$opt_t\" "         if (defined $opt_t);
1255    if (defined $opt_p) {
1256        $retval .= "-dbtype ";
1257        if ((length($opt_p) == 0 or $opt_p =~ /t/i)) {
1258            $retval .= "prot ";
1259        } else {
1260            $retval .= "nucl ";
1261        }
1262    }
1263    if ($retval =~ /blastdb_aliastool/) {
1264        $retval .= "-out $opt_L "               if (defined $opt_L);
1265        if (defined $opt_i and not defined $opt_n) {
1266            $retval .= &create_db_argument($opt_i);
1267        }
1268        # there's no -n in blastdb_aliastool, as we copy the argument value
1269        # verbatim into the DBLIST field of the alias file, so we make
1270        # formatdb's -n option tool override -i
1271        $retval .= &create_db_argument($opt_n)  if (defined $opt_n);
1272    } else {
1273        $retval .= "-out $opt_n "               if (defined $opt_n);
1274        $retval .= "-in $opt_i "                if (defined $opt_i);
1275    }
1276    $retval .= "-gilist $opt_F "            if (defined $opt_F);
1277    $retval .= "-logfile $opt_l "           if (defined $opt_l);
1278    $retval .= "-taxid-map $opt_T "           if (defined $opt_T);
1279
1280    if (defined $opt_o and (length($opt_o) == 0 or $opt_o =~ /t/i)) {
1281        $retval .= "-parse_seqids ";
1282    }
1283    if (defined $opt_a) {
1284        print STDERR "Warning: -a option is not supported\n";
1285    }
1286    if (defined $opt_b) {
1287        print STDERR "Warning: -b option is not supported\n";
1288    }
1289    if (defined $opt_e) {
1290        print STDERR "Warning: -e option is not supported\n";
1291    }
1292    if (defined $opt_s) {
1293        print STDERR "Warning: -s option is not supported\n";
1294    }
1295    if (defined $opt_V) {
1296        print STDERR "Warning: -V option is not supported\n";
1297    }
1298    if (defined $opt_v) {
1299        print STDERR "Warning: -v option is not supported, please use " .
1300            "the -max_file_sz option to makeblastdb\n";
1301    }
1302    return $retval;
1303}
1304__END__
1305
1306=head1 NAME
1307
1308B<legacy_blast.pl> - Convert BLAST command line invocations from NCBI C
1309toolkit's implementation to NCBI C++ toolkit's implementation.
1310
1311=head1 SYNOPSIS
1312
1313legacy_blast.pl <C toolkit command line program and arguments> [--print_only]
1314[--path /path/to/binaries]
1315legacy_blast.pl [--version]
1316legacy_blast.pl [--help]
1317
1318=head1 OPTIONS
1319
1320=over 2
1321
1322=item B<--path>
1323
1324Use the provided path as the location of the BLAST binaries to execute/print
1325(default: /usr/bin).
1326
1327=item B<--print_only>
1328
1329Print the equivalent command line option instead of running the command
1330(default: false).
1331
1332=item B<--version>
1333
1334Prints this script's version. Must be invoked as the first and only argument to
1335this script.
1336
1337=back
1338
1339=head1 DESCRIPTION
1340
1341This script converts and runs the equivalent NCBI C toolkit command line BLAST
1342program and arguments provided to it (whenever possible) to NCBI C++ tookit
1343BLAST programs. Note that to specify options to this script they B<MUST> use 2
1344dashes to prefix the options B<AND> be listed at the end of the command line
1345invocation to convert.
1346
1347=head1 EXIT CODES
1348
1349This script returns 0 on success and a non-zero value on errors.
1350
1351=head1 BUGS
1352
1353Please report them to <blast-help@ncbi.nlm.nih.gov>
1354
1355=head1 COPYRIGHT
1356
1357See PUBLIC DOMAIN NOTICE included at the top of this script.
1358
1359=cut
1360