1#!/usr/local/bin/perl 2# $Id: legacy_blast.pl 608983 2020-05-26 15:41:33Z camacho $ 3# =========================================================================== 4# 5# PUBLIC DOMAIN NOTICE 6# National Center for Biotechnology Information 7# 8# This software/database is a "United States Government Work" under the 9# terms of the United States Copyright Act. It was written as part of 10# the author's official duties as a United States Government employee and 11# thus cannot be copyrighted. This software/database is freely available 12# to the public for use. The National Library of Medicine and the U.S. 13# Government have not placed any restriction on its use or reproduction. 14# 15# Although all reasonable efforts have been taken to ensure the accuracy 16# and reliability of the software and data, the NLM and the U.S. 17# Government do not and cannot warrant the performance or results that 18# may be obtained by using this software or data. The NLM and the U.S. 19# Government disclaim all warranties, express or implied, including 20# warranties of performance, merchantability or fitness for any particular 21# purpose. 22# 23# Please cite the author in any work or product based on this material. 24# 25# =========================================================================== 26# 27# Author: Christiam Camacho 28# 29# File Description: 30# Script to convert NCBI C toolkit command line program and arguments into 31# NCBI C++ toolkit command line program and arguments for the BLAST suite of 32# programs 33# 34# =========================================================================== 35 36use strict; 37use warnings; 38use Getopt::Long qw(:config no_ignore_case bundling no_auto_abbrev); 39use Pod::Usage; 40 41use constant DEBUG => 0; 42# Default PATH where binaries will be found 43use constant DEFAULT_PATH => "/usr/bin"; 44 45pod2usage({-exitval => 1, -verbose => 2}) if (@ARGV == 0); 46 47my $application = shift; 48my $print_only = "0"; # Determines whether script prints or runs the command 49# This array will contain file names to delete that are created with bl2seq's 50# -A option 51my @files2delete; 52 53my $cmd; 54if ($application eq "blastall") { 55 $cmd = &handle_blastall(\$print_only); 56} elsif ($application eq "megablast") { 57 $cmd = &handle_megablast(\$print_only); 58} elsif ($application eq "blastpgp") { 59 $cmd = &handle_blastpgp(\$print_only); 60} elsif ($application eq "bl2seq") { 61 $cmd = &handle_bl2seq(\$print_only); 62} elsif ($application eq "rpsblast") { 63 $cmd = &handle_rpsblast(\$print_only); 64} elsif ($application eq "fastacmd") { 65 $cmd = &handle_fastacmd(\$print_only); 66} elsif ($application eq "formatdb") { 67 $cmd = &handle_formatdb(\$print_only); 68} elsif ($application eq "seedtop") { 69 $cmd = &handle_seedtop(\$print_only); 70} elsif ($application =~ /version/) { 71 my $revision = '$Revision: 608983 $'; 72 $revision =~ s/\$Revision: | \$//g; 73 print "$0 version $revision\n"; 74 goto CLEAN_UP; 75} elsif ($application =~ /help/) { 76 pod2usage({-exitval => 1, -verbose => 2}); 77} else { 78 die "Application: '$application' is not supported\n"; 79} 80 81if ($print_only) { 82 print "$cmd\n"; 83} else { 84 print STDERR "$cmd\n" if (DEBUG); 85 my $rv = system($cmd); 86 unless ($rv == 0) { 87 die "Program failed, try executing the command manually.\n"; 88 } 89} 90 91CLEAN_UP: 92unlink foreach (@files2delete); 93 94# Only add quotation marks in case there are spaces in the database argument 95sub create_db_argument($) 96{ 97 my $arg = shift; 98 my $retval = "-db "; 99 $retval .= ( ($arg =~ /\s/) ? "\"$arg\" " : "$arg "); 100 return $retval; 101} 102 103# Converts floating point numbers to integers 104sub convert_float_to_int($) 105{ 106 my $float_arg = shift; 107 my $retval = 0; 108 if ($float_arg =~ /(\d+)e([+-])(\d+)/) { 109 $retval = $1; 110 if ($2 eq "+") { 111 $retval *= 10**$3 112 } else { 113 $retval /= 10**$3 114 } 115 } else { 116 $retval = int($float_arg); 117 } 118 return $retval; 119} 120 121# Add the .exe extension for binaries if necessary on windows 122sub add_exe_extension() 123{ 124 return ($^O =~ /mswin|cygwin/i) ? ".exe " : " "; 125} 126 127sub convert_sequence_locations($$) 128{ 129 my $arg = shift; 130 my $target = shift; 131 my $retval; 132 if (defined $arg) { 133 if ($target eq "query") { 134 $retval .= "-query_loc "; 135 } elsif ($target eq "range") { 136 $retval .= "-range "; 137 } else { 138 $retval .= "-subject_loc "; 139 } 140 my @fields = split(/[ ;,]/, $arg); 141 $retval .= "$fields[0]-$fields[1] "; 142 } 143 return $retval; 144} 145 146sub convert_filter_string($$) 147{ 148 my $filter_string = shift; 149 my $program = shift; 150 151 #print STDERR "Parsing '$filter_string'\n"; 152 153 if ($filter_string =~ /F/) { 154 if ($program eq "blastp" or $program eq "tblastn" or 155 $program eq "blastx" or $program eq "tblastx") { 156 return "-seg no "; 157 } else { 158 return "-dust no "; 159 } 160 } 161 162 my $retval = ""; 163 if ($filter_string =~ /S (\d+) (\S+) (\S+)/) { 164 $retval .= "-seg '$1 $2 $3' "; 165 } 166 if ($filter_string =~ /D (\d+) (\d+) (\d+)/) { 167 $retval .= "-dust '$1 $2 $3' "; 168 } 169 if ($filter_string =~ /R -d (\S+)/) { 170 $retval .= "-filtering_db $1 "; 171 } elsif ($filter_string =~ /R\s*;/) { 172 $retval .= "-filtering_db repeat/repeat_9606 "; 173 } 174 175 if ($filter_string =~ /L|T|S|D/ and not ($retval =~ /seg|dust/)) { 176 if ($program eq "blastp" or $program eq "tblastn" or 177 $program eq "blastx") { 178 $retval .= "-seg yes "; 179 } else { 180 $retval .= "-dust yes "; 181 } 182 } 183 184 if ($filter_string =~ /m/) { 185 $retval .= "-soft_masking true "; 186 } 187 #print STDERR "returning '$retval'\n"; 188 return $retval; 189} 190 191sub convert_strand($) 192{ 193 my $old_strand_arg = shift; 194 my $retval = "-strand "; 195 if ($old_strand_arg == 1) { 196 $retval .= "plus "; 197 } elsif ($old_strand_arg == 2) { 198 $retval .= "minus "; 199 } else { 200 $retval .= "both "; 201 } 202 return $retval; 203} 204 205# Handle the conversion from blastall arguments to the corresponding C++ 206# binaries 207sub handle_blastall($) 208{ 209 my $print_only = shift; 210 my $path = DEFAULT_PATH; 211 my ($opt_A, $opt_B, $opt_C, $opt_D, $opt_E, $opt_F, $opt_G, $opt_I, $opt_J, 212 $opt_K, $opt_L, $opt_M, $opt_O, $opt_P, $opt_Q, $opt_R, $opt_S, $opt_T, 213 $opt_U, $opt_V, $opt_W, $opt_X, $opt_Y, $opt_Z, $opt_a, $opt_b, $opt_d, 214 $opt_e, $opt_f, $opt_g, $opt_i, $opt_l, $opt_m, $opt_n, $opt_o, $opt_p, 215 $opt_q, $opt_r, $opt_s, $opt_t, $opt_v, $opt_w, $opt_y, $opt_z); 216 217 GetOptions("<>" => sub { $application = shift; }, 218 "print_only!" => $print_only, 219 "path=s" => \$path, 220 "A=i" => \$opt_A, 221 "B=i" => \$opt_B, # not handled, not applicable 222 "C=s" => \$opt_C, 223 "D=i" => \$opt_D, 224 "E=i" => \$opt_E, 225 "F=s" => \$opt_F, 226 "G=i" => \$opt_G, 227 "I:s" => \$opt_I, 228 "J:s" => \$opt_J, 229 "K=i" => \$opt_K, 230 "L=s" => \$opt_L, 231 "M=s" => \$opt_M, 232 "O=s" => \$opt_O, 233 "P=i" => \$opt_P, 234 "Q=i" => \$opt_Q, 235 "R=s" => \$opt_R, 236 "S=i" => \$opt_S, 237 "T:s" => \$opt_T, 238 "U:s" => \$opt_U, 239 "V:s" => \$opt_V, # not handled, not applicable 240 "W=i" => \$opt_W, 241 "X=i" => \$opt_X, 242 "Y=f" => \$opt_Y, 243 "Z=i" => \$opt_Z, 244 "a=i" => \$opt_a, 245 "b=i" => \$opt_b, 246 "d=s" => \$opt_d, 247 "e=f" => \$opt_e, 248 "f=i" => \$opt_f, 249 "g:s" => \$opt_g, 250 "i=s" => \$opt_i, 251 "l=s" => \$opt_l, 252 "m=i" => \$opt_m, 253 "n:s" => \$opt_n, 254 "o=s" => \$opt_o, 255 "p=s" => \$opt_p, 256 "q=i" => \$opt_q, 257 "r=i" => \$opt_r, 258 "s:s" => \$opt_s, 259 "t=i" => \$opt_t, 260 "v=i" => \$opt_v, 261 "w=i" => \$opt_w, 262 "y=f" => \$opt_y, 263 "z=f" => \$opt_z, 264 ); 265 266 unless (defined $opt_p) { 267 die "-p must be provided\n"; 268 } 269 270 my $retval = $path; 271 if (defined $opt_p) { 272 if (defined $opt_R) { 273 $retval .= "/tblastn"; 274 $retval .= &add_exe_extension(); 275 $retval .= "-in_pssm $opt_R "; 276 } elsif (defined $opt_n and $opt_n =~ /t/i) { 277 $retval .= "/blastn"; 278 $retval .= &add_exe_extension(); 279 $retval .= "-task megablast "; 280 } else { 281 $retval .= "/$opt_p"; 282 $retval .= &add_exe_extension(); 283 $retval .= "-task blastn " if ($opt_p eq "blastn"); 284 } 285 } 286 $retval .= &create_db_argument($opt_d) if (defined $opt_d); 287 unless (($retval =~ /\/tblastn/) and defined $opt_R) { 288 $retval .= "-query $opt_i " if (defined $opt_i); 289 } 290 $retval .= "-gilist $opt_l " if (defined $opt_l); 291 $retval .= "-dbsize $opt_z " if (defined $opt_z); 292 $retval .= "-matrix $opt_M " if (defined $opt_M); 293 $retval .= "-evalue $opt_e " if (defined $opt_e); 294 $retval .= "-gapopen $opt_G " if (defined $opt_G); 295 $retval .= "-gapextend $opt_E " if (defined $opt_E); 296 $retval .= "-xdrop_ungap $opt_y " if (defined $opt_y); 297 $retval .= "-xdrop_gap $opt_X " if (defined $opt_X); 298 $retval .= "-xdrop_gap_final $opt_Z " if (defined $opt_Z); 299 $retval .= "-num_threads $opt_a " if (defined $opt_a); 300 if (defined $opt_A) { 301 if (defined $opt_P and $opt_P ne "0") { 302 print STDERR "Warning: ignoring -P because window size is set\n"; 303 } 304 $retval .= "-window_size $opt_A " 305 } 306 if (defined $opt_P and $opt_P eq "1" and (not defined $opt_A)) { 307 $retval .= "-window_size 0 "; 308 } 309 $retval .= "-word_size $opt_W " if (defined $opt_W); 310 if (defined $opt_Y) { 311 $retval .= "-searchsp " . &convert_float_to_int($opt_Y) . " "; 312 } 313 if (defined $opt_f) { 314 unless ($opt_p eq "blastn") { 315 $retval .= "-min_word_score $opt_f " 316 } else { 317 print STDERR "Warning: -f is not supported for blastn\n"; 318 } 319 } 320 if (defined $opt_I and (length($opt_I) == 0 or $opt_I =~ /t/i)) { 321 $retval .= "-show_gis "; 322 } 323 $retval .= "-num_descriptions $opt_v " if (defined $opt_v); 324 $retval .= "-num_alignments $opt_b " if (defined $opt_b); 325 $retval .= "-query_gencode $opt_Q " if (defined $opt_Q); 326 $retval .= "-db_gencode $opt_D " if (defined $opt_D); 327 $retval .= "-penalty $opt_q " if (defined $opt_q); 328 $retval .= "-reward $opt_r " if (defined $opt_r); 329 $retval .= "-culling_limit $opt_K " if (defined $opt_K); 330 $retval .= "-max_intron_length $opt_t " if (defined $opt_t); 331 $retval .= "-frame_shift_penalty $opt_w " if (defined $opt_w); 332 $retval .= "-comp_based_stats $opt_C " if (defined $opt_C); 333 $retval .= "-out $opt_o " if (defined $opt_o); 334 if (defined $opt_m) { 335 if ($opt_m == 5 or $opt_m == 6) { 336 print STDERR "Warning: -m5 or -m6 formatting options "; 337 print STDERR "are not supported!\n"; 338 } 339 $opt_m -= 2 if ($opt_m >= 7); 340 $retval .= "-outfmt $opt_m " 341 } 342 if (defined $opt_O) { 343 unless ($retval =~ s/-out \S+ /-out $opt_O /) { 344 $retval .= "-out $opt_O "; 345 } 346 unless ($retval =~ s/-outfmt \d+/-outfmt 8/) { 347 $retval .= "-outfmt 8 "; 348 } else { 349 print STDERR "Warning: overriding output format\n"; 350 } 351 } 352 if (defined $opt_T and (length($opt_T) == 0 or $opt_T =~ /t/i)) { 353 $retval .= "-html " 354 } 355 356 $retval .= &convert_sequence_locations($opt_L, "query") if ($opt_L); 357 if (defined $opt_U and (length($opt_U) == 0 or $opt_U =~ /t/i)) { 358 $retval .= "-lcase_masking "; 359 } 360 if (defined $opt_g and $opt_g =~ /f/i) { 361 $retval .= "-ungapped "; 362 } 363 if (defined $opt_J and (length($opt_J) == 0 or $opt_J =~ /t/i)) { 364 $retval .= "-parse_deflines "; 365 } 366 $retval .= &convert_strand($opt_S) if (defined $opt_S and not 367 ($opt_p ne "blastp" or 368 $opt_p ne "tblastn")); 369 if (defined $opt_s and (length($opt_s) == 0 or $opt_s =~ /t/i)) { 370 $retval .= "-use_sw_tback "; 371 } 372 373 if (defined $opt_F) { 374 $retval .= &convert_filter_string($opt_F, $opt_p); 375 } elsif (not defined $opt_F and $opt_p eq "blastp") { 376 $retval .= &convert_filter_string("T", $opt_p); 377 } 378 379 return $retval; 380} 381 382sub handle_seedtop($) 383{ 384 my $print_only = shift; 385 my $path = DEFAULT_PATH; 386 my ($opt_C, $opt_D, $opt_E, $opt_F, $opt_G, $opt_I, $opt_J, $opt_K, $opt_M, 387 $opt_O, $opt_S, $opt_X, $opt_d, $opt_e, $opt_f, $opt_i, $opt_k, $opt_o, 388 $opt_p, $opt_q, $opt_r); 389 390 GetOptions("<>" => sub { $application = shift; }, 391 "print_only!" => $print_only, 392 "path=s" => \$path, 393 "C=i" => \$opt_C, 394 "D=i" => \$opt_D, 395 "E=i" => \$opt_E, 396 "F:s" => \$opt_F, 397 "G=i" => \$opt_G, 398 "I:s" => \$opt_I, 399 "J:s" => \$opt_J, 400 "K=i" => \$opt_K, 401 "M=s" => \$opt_M, 402 "O=s" => \$opt_O, 403 "S=i" => \$opt_S, 404 "X=i" => \$opt_X, 405 "d=s" => \$opt_d, 406 "e=f" => \$opt_e, 407 "f:s" => \$opt_f, 408 "i=s" => \$opt_i, 409 "k=s" => \$opt_k, 410 "o=s" => \$opt_o, 411 "p=s" => \$opt_p, 412 "q=i" => \$opt_q, 413 "r=i" => \$opt_r 414 ); 415 416 my $retval = $path; 417 $retval .= "/psiblast"; 418 $retval .= &add_exe_extension(); 419 $retval .= "-query $opt_i " if (defined $opt_i); 420 $retval .= "-phi_pattern $opt_k " if (defined $opt_k); 421 $retval .= &create_db_argument($opt_d) if (defined $opt_d); 422 $retval .= "-gapopen $opt_G " if (defined $opt_G); 423 $retval .= "-gapextend $opt_E " if (defined $opt_E); 424 $retval .= "-out $opt_o " if (defined $opt_o); 425 if (defined $opt_O) { 426 unless ($retval =~ s/-out \S+ /-out $opt_O /) { 427 $retval .= "-out $opt_O "; 428 } 429 unless ($retval =~ s/-outfmt \d+/-outfmt 8/) { 430 $retval .= "-outfmt 8 "; 431 } else { 432 print STDERR "Warning: overriding output format\n"; 433 } 434 } 435 if (defined $opt_I and (length($opt_I) == 0 or $opt_I =~ /t/i)) { 436 $retval .= "-show_gis "; 437 } 438 if (defined $opt_J and (length($opt_J) == 0 or $opt_J =~ /t/i)) { 439 $retval .= "-parse_deflines "; 440 } 441 $retval .= "-xdrop_gap $opt_X " if (defined $opt_X); 442 $retval .= "-evalue $opt_e " if (defined $opt_e); 443 $retval .= "-matrix $opt_M " if (defined $opt_M); 444 my $query_is_protein = "1"; 445 if (defined $opt_p) { 446 unless ($opt_p eq "patseedp") { 447 die "Only patseedp program is supported\n"; 448 } 449 # Change query_is_protein if other programs are supported 450 } else { 451 die "Program must be specified\n"; 452 } 453 if (defined $opt_F) { 454 $retval .= &convert_filter_string($opt_F, 455 ($query_is_protein eq "1") 456 ? "blastp" : "blastn"); 457 } 458 459 # Unsupported options 460 if (defined $opt_D) { 461 print STDERR "Warning: -D option is not supported!\n"; 462 } 463 if (defined $opt_S) { 464 print STDERR "Warning: -S option is not supported!\n"; 465 } 466 if (defined $opt_C) { 467 print STDERR "Warning: -C option is not supported!\n"; 468 } 469 if (defined $opt_q) { 470 print STDERR "Warning: -q option is not supported!\n"; 471 } 472 if (defined $opt_r) { 473 print STDERR "Warning: -r option is not supported!\n"; 474 } 475 if (defined $opt_f) { 476 print STDERR "Warning: -f option is not supported!\n"; 477 } 478 if (defined $opt_K) { 479 print STDERR "Warning: -K option is not supported!\n"; 480 } 481 return $retval; 482} 483 484sub handle_megablast($) 485{ 486 my $print_only = shift; 487 my $path = DEFAULT_PATH; 488 my ($opt_A, $opt_D, $opt_E, $opt_F, $opt_G, $opt_H, $opt_I, $opt_J, 489 $opt_L, $opt_M, $opt_N, $opt_O, $opt_P, $opt_Q, $opt_R, $opt_S, 490 $opt_T, $opt_U, $opt_V, $opt_W, $opt_X, $opt_Y, $opt_Z, $opt_a, 491 $opt_b, $opt_d, $opt_e, $opt_f, $opt_g, $opt_i, $opt_l, $opt_m, 492 $opt_n, $opt_o, $opt_p, $opt_q, $opt_r, $opt_s, $opt_t, $opt_v, 493 $opt_y, $opt_z); 494 495 GetOptions("<>" => sub { $application = shift; }, 496 "print_only!" => $print_only, 497 "path=s" => \$path, 498 "A=i" => \$opt_A, 499 "D=i" => \$opt_D, 500 "E=i" => \$opt_E, 501 "F=s" => \$opt_F, 502 "G=i" => \$opt_G, 503 "H=i" => \$opt_H, 504 "I:s" => \$opt_I, 505 "J:s" => \$opt_J, 506 "L=s" => \$opt_L, 507 "M=i" => \$opt_M, 508 "N=i" => \$opt_N, 509 "O=s" => \$opt_O, 510 "P=i" => \$opt_P, # no equivalent in new engine 511 "Q=s" => \$opt_Q, 512 "R:s" => \$opt_R, 513 "S=i" => \$opt_S, 514 "T:s" => \$opt_T, 515 "U:s" => \$opt_U, 516 "V:s" => \$opt_V, # not handled, not applicable 517 "W=i" => \$opt_W, 518 "X=i" => \$opt_X, 519 "Y=f" => \$opt_Y, 520 "Z=i" => \$opt_Z, 521 "a=i" => \$opt_a, 522 "b=i" => \$opt_b, 523 "d=s" => \$opt_d, 524 "e=f" => \$opt_e, 525 "f:s" => \$opt_f, 526 "g:s" => \$opt_g, 527 "i=s" => \$opt_i, 528 "l=s" => \$opt_l, 529 "m=i" => \$opt_m, 530 "n:s" => \$opt_n, 531 "o=s" => \$opt_o, 532 "p=f" => \$opt_p, 533 "q=i" => \$opt_q, 534 "r=i" => \$opt_r, 535 "s=i" => \$opt_s, 536 "t=i" => \$opt_t, 537 "v=i" => \$opt_v, 538 "y=i" => \$opt_y, 539 "z=f" => \$opt_z 540 ); 541 my $retval = $path; 542 543 $retval .= "/blastn"; 544 $retval .= &add_exe_extension(); 545 $retval .= "-query $opt_i " if (defined $opt_i); 546 $retval .= &create_db_argument($opt_d) if (defined $opt_d); 547 $retval .= "-evalue $opt_e " if (defined $opt_e); 548 $retval .= "-xdrop_gap $opt_X " if (defined $opt_X); 549 $retval .= "-gilist $opt_l " if (defined $opt_l); 550 $retval .= "-penalty $opt_q " if (defined $opt_q); 551 $retval .= "-reward $opt_r " if (defined $opt_r); 552 $retval .= "-gapopen $opt_G " if (defined $opt_G); 553 $retval .= "-gapextend $opt_E " if (defined $opt_E); 554 $retval .= "-out $opt_o " if (defined $opt_o); 555 if (defined $opt_m) { 556 if ($opt_m == 5 or $opt_m == 6) { 557 print STDERR "Warning: -m5 or -m6 formatting options "; 558 print STDERR "are not supported!\n"; 559 } 560 $opt_m -= 2 if ($opt_m >= 7); 561 $retval .= "-outfmt $opt_m " 562 } 563 if (defined $opt_O) { 564 unless ($retval =~ s/-out \S+ /-out $opt_O /) { 565 $retval .= "-out $opt_O "; 566 } 567 unless ($retval =~ s/-outfmt \d+/-outfmt 8/) { 568 $retval .= "-outfmt 8 "; 569 } else { 570 print STDERR "Warning: overriding output format\n"; 571 } 572 } 573 if (defined $opt_T and (length($opt_T) == 0 or $opt_T =~ /t/i)) { 574 $retval .= "-html " 575 } 576 $retval .= "-num_descriptions $opt_v " if (defined $opt_v); 577 $retval .= "-num_alignments $opt_b " if (defined $opt_b); 578 $retval .= "-num_threads $opt_a " if (defined $opt_a); 579 $retval .= "-word_size $opt_W " if (defined $opt_W); 580 $retval .= "-dbsize $opt_z " if (defined $opt_z); 581 if (defined $opt_Y) { 582 $retval .= "-searchsp " . &convert_float_to_int($opt_Y) . " "; 583 } 584 $retval .= "-xdrop_ungap $opt_y " if (defined $opt_y); 585 $retval .= "-xdrop_gap_final $opt_Z " if (defined $opt_Z); 586 if (defined $opt_t) { 587 $retval .= "-template_length $opt_t "; 588 # Set the template type to the default value in megablast if not 589 # provided, as blastn requires it 590 $opt_N = 0 unless (defined $opt_N); 591 } 592 $retval .= "-window_size $opt_A " if (defined $opt_A); 593 if (defined $opt_N) { 594 $retval .= "-template_type coding " if ($opt_N == 0); 595 $retval .= "-template_type optimal " if ($opt_N == 1); 596 $retval .= "-template_type coding_and_optimal " if ($opt_N == 2); 597 } 598 if (defined $opt_F) { 599 $retval .= &convert_filter_string($opt_F, "blastn"); 600 } 601 if (defined $opt_J and (length($opt_J) == 0 or $opt_J =~ /t/i)) { 602 $retval .= "-parse_deflines "; 603 } 604 605 606 $retval .= "-perc_identity $opt_p " if (defined $opt_p); 607 $retval .= "-min_raw_gapped_score $opt_s " if (defined $opt_s); 608 $retval .= &convert_strand($opt_S) if (defined $opt_S); 609 $retval .= &convert_sequence_locations($opt_L, "query") if ($opt_L); 610 if (defined $opt_U and (length($opt_U) == 0 or $opt_U =~ /t/i)) { 611 $retval .= "-lcase_masking "; 612 } 613 if (defined $opt_n and (length($opt_n) == 0 or $opt_n =~ /t/i)) { 614 $retval .= "-no_greedy "; 615 } 616 617 # Unsupported options 618 # This option can be safely ignored 619 #if (defined $opt_M) { 620 # print STDERR "Warning: -M option is ignored\n"; 621 #} 622 623 my $tab_with_acc = 624 "-outfmt \"7 qacc sseqid pident length mismatch gapopen qstart qend " . 625 "sstart send evalue bitscore\" "; 626 627 # Here are some combinations of options and their equivalent conversion to 628 # the -outfmt option: 629 # NOTE: only in the last case we use sgi as the user explicitely requests 630 # the GIs to be shown (via -I), thus we assume the database/subjects will 631 # have GIs. We don't do the same for accessions, because if these are not 632 # available, an ordinal ID gets printed. 633 # -J -D3 -R -fF = -outfmt "7 qacc sseqid pident length mismatch gapopen 634 # qstart qend sstart send evalue bitscore" 635 # -J -D3 -R -fT = -outfmt "7 qseqid sseqid pident length mismatch gapopen 636 # qstart qend sstart send evalue bitscore" 637 # -J -D3 -R -fT -I = -outfmt "7 qgi sgi pident length mismatch gapopen 638 # qstart qend sstart send evalue bitscore" 639 640 if (defined $opt_D) { 641 if ($opt_D == 3) { # tabular output 642 unless ($retval =~ s/-outfmt \d+/$tab_with_acc/) { 643 $retval .= "$tab_with_acc "; 644 } else { 645 print STDERR "Warning: overriding output format\n"; 646 } 647 } elsif ($opt_D == 2) { # traditional BLAST output 648 unless ($retval =~ s/-outfmt \d+/-outfmt 0/) { 649 $retval .= "-outfmt 0 "; 650 } else { 651 print STDERR "Warning: overriding output format\n"; 652 } 653 } elsif ($opt_D == 4) { # text ASN.1 654 unless ($retval =~ s/-outfmt \d+/-outfmt 8/) { 655 $retval .= "-outfmt 8 "; 656 } else { 657 print STDERR "Warning: overriding output format\n"; 658 } 659 } elsif ($opt_D == 5) { # binary ASN.1 660 unless ($retval =~ s/-outfmt \d+/-outfmt 9/) { 661 $retval .= "-outfmt 9 "; 662 } else { 663 print STDERR "Warning: overriding output format\n"; 664 } 665 } else { 666 print STDERR "Warning: -D option with value $opt_D is not " . 667 "supported!\n"; 668 } 669 } 670 671 if (defined $opt_I and (length($opt_I) == 0 or $opt_I =~ /t/i)) { 672 $retval .= "-show_gis "; 673 $retval =~ s/qacc/qgi/; 674 $retval =~ s/sseqid/sgi/; 675 } 676 # -fF is the default, if -f or -fT is specified, we assume that's what's 677 # desired and we apply a modification to the previously set output format 678 # (we can safely assume this b/c -f only works with -D3) 679 if (defined $opt_f and (length($opt_f) == 0 or $opt_f =~ /t/i)) { 680 $retval =~ s/qacc/qseqid/; 681 } 682 if (defined $opt_R and not ($retval =~ /-outfmt.*7/)) { 683 print STDERR "Warning: -R option is deprecated, please rely on the ". 684 "application's exit code to determine its success or failure.\n" . 685 "0 means success, non-zero means failure\n"; 686 } 687 # Deprecated options 688 if (defined $opt_g and $opt_g =~ /f/i) { 689 print STDERR "Warning: -g option is not supported!\n"; 690 } 691 if (defined $opt_H) { 692 print STDERR "Warning -H option is not supported!\n"; 693 } 694 if (defined $opt_Q) { 695 print STDERR "Warning: -Q option is deprecated\n"; 696 } 697 if (defined $opt_P) { 698 print STDERR "Warning: -P option is deprecated\n"; 699 } 700 701 return $retval; 702} 703 704sub handle_blastpgp($) 705{ 706 my $print_only = shift; 707 my $path = DEFAULT_PATH; 708 my ($opt_A, $opt_B, $opt_C, $opt_E, $opt_F, $opt_G, $opt_H, $opt_I, 709 $opt_J, $opt_K, $opt_L, $opt_M, $opt_N, $opt_O, $opt_P, $opt_Q, 710 $opt_R, $opt_S, $opt_T, $opt_U, $opt_W, $opt_X, $opt_Y, $opt_Z, 711 $opt_a, $opt_b, $opt_c, $opt_d, $opt_e, $opt_f, $opt_h, $opt_i, 712 $opt_j, $opt_k, $opt_l, $opt_m, $opt_o, $opt_p, $opt_q, $opt_s, 713 $opt_t, $opt_u, $opt_v, $opt_y, $opt_z); 714 715 GetOptions("<>" => sub { $application = shift; }, 716 "print_only!" => $print_only, 717 "path=s" => \$path, 718 "A=i" => \$opt_A, 719 "B=s" => \$opt_B, 720 "C=s" => \$opt_C, 721 "E=i" => \$opt_E, 722 "F=s" => \$opt_F, 723 "G=i" => \$opt_G, 724 "H=i" => \$opt_H, 725 "I:s" => \$opt_I, 726 "J:s" => \$opt_J, 727 "K=i" => \$opt_K, 728 "L=i" => \$opt_L, 729 "M=s" => \$opt_M, 730 "N=f" => \$opt_N, 731 "O=s" => \$opt_O, 732 "P=i" => \$opt_P, 733 "Q=s" => \$opt_Q, 734 "R=s" => \$opt_R, 735 "S=i" => \$opt_S, 736 "T:s" => \$opt_T, 737 "U:s" => \$opt_U, 738 "W=i" => \$opt_W, 739 "X=i" => \$opt_X, 740 "Y=f" => \$opt_Y, 741 "Z=i" => \$opt_Z, 742 "a=i" => \$opt_a, 743 "b=i" => \$opt_b, 744 "c=i" => \$opt_c, 745 "d=s" => \$opt_d, 746 "e=f" => \$opt_e, 747 "f=i" => \$opt_f, 748 "h=f" => \$opt_h, 749 "i=s" => \$opt_i, 750 "j=i" => \$opt_j, 751 "k=s" => \$opt_k, 752 "l=s" => \$opt_l, 753 "m=i" => \$opt_m, 754 "o=s" => \$opt_o, 755 "p=s" => \$opt_p, 756 "q=i" => \$opt_q, 757 "s:s" => \$opt_s, 758 "t=s" => \$opt_t, 759 "u=i" => \$opt_u, 760 "v=i" => \$opt_v, 761 "y=f" => \$opt_y, 762 "z=f" => \$opt_z 763 ); 764 my $retval = $path . "/psiblast"; 765 $retval .= &add_exe_extension(); 766 767 my $query_is_protein = "1"; 768 769 if (defined $opt_p and not ($opt_p ne "blastpgp" or 770 $opt_p ne "patseedp")) { 771 die "Program '$opt_p' not implemented\n"; 772 } 773 774 $retval .= &create_db_argument($opt_d) if (defined $opt_d); 775 $retval .= "-query $opt_i " if (defined $opt_i); 776 $retval .= "-gilist $opt_l " if (defined $opt_l); 777 $retval .= "-gap_trigger $opt_N " if (defined $opt_N); 778 $retval .= "-matrix $opt_M " if (defined $opt_M); 779 $retval .= "-num_iterations $opt_j " if (defined $opt_j); 780 $retval .= "-min_word_score $opt_f " if (defined $opt_f); 781 $retval .= "-evalue $opt_e " if (defined $opt_e); 782 $retval .= "-gapopen $opt_G " if (defined $opt_G); 783 $retval .= "-gapextend $opt_E " if (defined $opt_E); 784 $retval .= "-num_threads $opt_a " if (defined $opt_a); 785 $retval .= "-dbsize $opt_z " if (defined $opt_z); 786 if (defined $opt_Y) { 787 $retval .= "-searchsp " . &convert_float_to_int($opt_Y) . " "; 788 } 789 $retval .= "-pseudocount $opt_c " if (defined $opt_c); 790 $retval .= "-inclusion_ethresh $opt_h " if (defined $opt_h); 791 if (defined $opt_A) { 792 if (defined $opt_P and $opt_P ne "0") { 793 print STDERR "Warning: ignoring -P because window size is set\n"; 794 } 795 $retval .= "-window_size $opt_A " 796 } 797 if (defined $opt_P and $opt_P eq "1" and (not defined $opt_A)) { 798 $retval .= "-window_size 0 "; 799 } 800 $retval .= "-word_size $opt_W " if (defined $opt_W); 801 $retval .= "-xdrop_ungap $opt_y " if (defined $opt_y); 802 $retval .= "-xdrop_gap $opt_X " if (defined $opt_X); 803 $retval .= "-xdrop_gap_final $opt_Z " if (defined $opt_Z); 804 $retval .= "-num_descriptions $opt_v " if (defined $opt_v); 805 $retval .= "-num_alignments $opt_b " if (defined $opt_b); 806 $retval .= "-culling_limit $opt_K " if (defined $opt_K); 807 $retval .= "-comp_based_stats $opt_t " if (defined $opt_t); 808 $retval .= "-phi_pattern $opt_k " if (defined $opt_k); 809 $retval .= "-out $opt_o " if (defined $opt_o); 810 $retval .= "-out_ascii_pssm $opt_Q " if (defined $opt_Q); 811 $retval .= "-in_msa $opt_B " if (defined $opt_B); 812 813 if (defined $opt_m) { 814 if ($opt_m == 5 or $opt_m == 6) { 815 print STDERR "Warning: -m5 or -m6 formatting options "; 816 print STDERR "are not supported!\n"; 817 } 818 $opt_m -= 2 if ($opt_m >= 7); 819 $retval .= "-outfmt $opt_m " 820 } 821 if (defined $opt_O) { 822 unless ($retval =~ s/-out \S+ /-out $opt_O /) { 823 $retval .= "-out $opt_O "; 824 } 825 unless ($retval =~ s/-outfmt \d+/-outfmt 8/) { 826 $retval .= "-outfmt 8 "; 827 } else { 828 print STDERR "Warning: overriding output format\n"; 829 } 830 } 831 if (defined $opt_T and (length($opt_T) == 0 or $opt_T =~ /t/i)) { 832 $retval .= "-html " 833 } 834 if (defined $opt_I and (length($opt_I) == 0 or $opt_I =~ /t/i)) { 835 $retval .= "-show_gis "; 836 } 837 if (defined $opt_J and (length($opt_J) == 0 or $opt_J =~ /t/i)) { 838 $retval .= "-parse_deflines "; 839 } 840 if (defined $opt_s and (length($opt_s) == 0 or $opt_s =~ /t/i)) { 841 $retval .= "-use_sw_tback "; 842 } 843 if (defined $opt_U and (length($opt_U) == 0 or $opt_U =~ /t/i)) { 844 $retval .= "-lcase_masking "; 845 } 846 if (defined $opt_F) { 847 $retval .= &convert_filter_string($opt_F, 848 ($query_is_protein eq "1") 849 ? "blastp" : "blastn"); 850 } 851 852 my $location = ""; 853 $location .= $opt_S if (defined $opt_S); 854 if (defined $opt_H) { 855 if ($location eq "") { 856 $location = "0,$opt_H"; 857 } else { 858 $location .= ",$opt_H"; 859 } 860 } 861 if ($location ne "") { 862 $location .= ",-1" unless (defined $opt_H); 863 $retval .= &convert_sequence_locations($location, "query"); 864 } 865 866 # Checkpoint file recovery 867 if (defined $opt_R) { 868 if (defined $opt_q and $opt_q ne "0") { 869 $retval .= "-in_pssm $opt_R " 870 } else { 871 die "ERROR: recovery from C toolkit checkpoint " . 872 "file format not supported\n"; 873 } 874 } 875 876 # Checkpoint file saving 877 if (defined $opt_C) { 878 if (defined $opt_C and $opt_u ne "0") { 879 $retval .= "-out_pssm $opt_C " 880 } else { 881 die "ERROR: saving PSSM to C toolkit checkpoint " . 882 "file format not supported\n"; 883 } 884 } 885 886 return $retval; 887} 888 889# Tested: all conversions should work 890sub handle_bl2seq 891{ 892 use File::Temp qw(:POSIX); # for tmpnam 893 894 my $print_only = shift; 895 my $path = DEFAULT_PATH; 896 my ($opt_A, $opt_D, $opt_E, $opt_F, $opt_G, $opt_I, $opt_J, $opt_M, 897 $opt_S, $opt_T, $opt_U, $opt_V, $opt_W, $opt_X, $opt_Y, $opt_a, 898 $opt_d, $opt_e, $opt_g, $opt_i, $opt_j, $opt_m, $opt_o, $opt_p, 899 $opt_q, $opt_r, $opt_t); 900 901 GetOptions("<>" => sub { $application = shift; }, 902 "print_only!" => $print_only, 903 "path=s" => \$path, 904 "A:s" => \$opt_A, 905 "D=i" => \$opt_D, 906 "E=i" => \$opt_E, 907 "F=s" => \$opt_F, 908 "G=i" => \$opt_G, 909 "I=s" => \$opt_I, 910 "J=s" => \$opt_J, 911 "M=s" => \$opt_M, 912 "S=i" => \$opt_S, 913 "T:s" => \$opt_T, 914 "U:s" => \$opt_U, 915 "V:s" => \$opt_V, # not handled, not applicable 916 "W=i" => \$opt_W, 917 "X=i" => \$opt_X, 918 "Y=f" => \$opt_Y, 919 "a=s" => \$opt_a, 920 "d=f" => \$opt_d, 921 "e=f" => \$opt_e, 922 "g:s" => \$opt_g, 923 "i=s" => \$opt_i, 924 "j=s" => \$opt_j, 925 "m:s" => \$opt_m, 926 "o=s" => \$opt_o, 927 "p=s" => \$opt_p, 928 "q=i" => \$opt_q, 929 "r=i" => \$opt_r, 930 "t=i" => \$opt_t 931 ); 932 my $retval = $path; 933 934 unless (defined $opt_i and defined $opt_j) { 935 die "-i and -j are required in bl2seq\n"; 936 } 937 938 if (defined $opt_p) { 939 $retval .= "/$opt_p"; 940 $retval .= &add_exe_extension(); 941 } else { 942 die "Program must be specified via the -p option\n"; 943 } 944 unless (defined $opt_A) { 945 $retval .= "-query $opt_i " if (defined $opt_i); 946 $retval .= "-subject $opt_j " if (defined $opt_j); 947 } else { 948 # The -A option is not supported, so we create temporary files to 949 # simulate it (example input: bl2seq -i129295 -j104501 -pblastp -A) 950 my $query_fname = tmpnam(); 951 open(Q, ">$query_fname") or die "Failed to open $query_fname: $!\n"; 952 print Q "$opt_i" and close(Q); 953 push @files2delete, $query_fname; 954 955 my $subj_fname = tmpnam(); 956 open(S, ">$subj_fname") or die "Failed to open $subj_fname: $!\n"; 957 print S "$opt_j" and close(S); 958 push @files2delete, $subj_fname; 959 if (DEBUG) { 960 print STDERR "Created temp. files $query_fname and $subj_fname\n"; 961 } 962 963 $retval .= "-query $query_fname -subject $subj_fname "; 964 if ($$print_only) { 965 print STDERR "Warning: arguments to -query and -subject must be "; 966 print STDERR "files containing the\narguments to bl2seq's -i and "; 967 print STDERR "-j arguments respectively.\n"; 968 } 969 } 970 $retval .= "-out $opt_o " if (defined $opt_o); 971 if (defined $opt_a) { 972 unless ($retval =~ s/-out \S+ /-out $opt_a /) { 973 $retval .= "-out $opt_a "; 974 } 975 unless ($retval =~ s/-outfmt \d+/-outfmt 8/) { 976 $retval .= "-outfmt 8 "; 977 } else { 978 print STDERR "Warning: overriding output format\n"; 979 } 980 } 981 if (defined $opt_D and $opt_D =~ /1/) { 982 if ($retval =~ s/-outfmt \d+/-outfmt 7/) { 983 print STDERR "Warning: overriding output format\n"; 984 } else { 985 $retval .= "-outfmt 7 "; 986 } 987 } 988 if (defined $opt_T and (length($opt_T) == 0 or $opt_T =~ /t/i)) { 989 $retval .= "-html " 990 } 991 $retval .= "-evalue $opt_e " if (defined $opt_e); 992 $retval .= "-gapopen $opt_G " if (defined $opt_G); 993 $retval .= "-gapextend $opt_E " if (defined $opt_E); 994 $retval .= "-word_size $opt_W " if (defined $opt_W); 995 $retval .= "-matrix $opt_M " if (defined $opt_M); 996 $retval .= "-penalty $opt_q " if (defined $opt_q); 997 $retval .= "-reward $opt_r " if (defined $opt_r); 998 $retval .= &convert_strand($opt_S) if (defined $opt_S); 999 $retval .= "-max_intron_length $opt_t " if (defined $opt_t); 1000 $retval .= "-dbsize $opt_d " if (defined $opt_d); 1001 $retval .= "-xdrop_gap $opt_X " if (defined $opt_X); 1002 if (defined $opt_Y) { 1003 $retval .= "-searchsp " . &convert_float_to_int($opt_Y) . " "; 1004 } 1005 if (defined $opt_U and (length($opt_U) == 0 or $opt_U =~ /t/i)) { 1006 $retval .= "-lcase_masking "; 1007 } 1008 if (defined $opt_m and (length($opt_m) == 0 or $opt_m =~ /t/i)) { 1009 $retval .= "-task megablast "; 1010 } 1011 if (defined $opt_g and $opt_g =~ /f/i) { 1012 $retval .= "-ungapped "; 1013 } 1014 $retval .= &convert_sequence_locations($opt_I, "query") if ($opt_I); 1015 $retval .= &convert_sequence_locations($opt_J, "subject") if ($opt_J); 1016 1017 if (defined $opt_F) { 1018 $retval .= &convert_filter_string($opt_F, $opt_p); 1019 } 1020 1021 return $retval; 1022} 1023 1024sub handle_rpsblast 1025{ 1026 my $print_only = shift; 1027 my $path = DEFAULT_PATH; 1028 my ($opt_F, $opt_I, $opt_J, $opt_L, $opt_N, $opt_O, $opt_P, $opt_T, 1029 $opt_U, $opt_V, $opt_X, $opt_Y, $opt_Z, $opt_a, $opt_b, $opt_d, 1030 $opt_e, $opt_i, $opt_l, $opt_m, $opt_o, $opt_p, $opt_v, $opt_y, 1031 $opt_z); 1032 1033 GetOptions("<>" => sub { $application = shift; }, 1034 "print_only!" => $print_only, 1035 "path=s" => \$path, 1036 "F=s" => \$opt_F, 1037 "I:s" => \$opt_I, 1038 "J:s" => \$opt_J, 1039 "L=s" => \$opt_L, 1040 "N=f" => \$opt_N, 1041 "O=s" => \$opt_O, 1042 "P=i" => \$opt_P, 1043 "T:s" => \$opt_T, 1044 "U:s" => \$opt_U, 1045 "V=s" => \$opt_V, 1046 "X=i" => \$opt_X, 1047 "Y=f" => \$opt_Y, 1048 "Z=i" => \$opt_Z, 1049 "a=i" => \$opt_a, 1050 "b=i" => \$opt_b, 1051 "d=s" => \$opt_d, 1052 "e=f" => \$opt_e, 1053 "i=s" => \$opt_i, 1054 "l=s" => \$opt_l, 1055 "m=i" => \$opt_m, 1056 "o=s" => \$opt_o, 1057 "p:s" => \$opt_p, 1058 "v=i" => \$opt_v, 1059 "y=f" => \$opt_y, 1060 "z=f" => \$opt_z 1061 ); 1062 my $retval = $path; 1063 1064 if (defined $opt_p and $opt_p =~ /f/i) { 1065 $retval .= "/rpstblastn"; 1066 } else { 1067 $retval .= "/rpsblast"; 1068 } 1069 $retval .= &add_exe_extension(); 1070 1071 $retval .= "-query $opt_i " if (defined $opt_i); 1072 $retval .= &create_db_argument($opt_d) if (defined $opt_d); 1073 $retval .= "-evalue $opt_e " if (defined $opt_e); 1074 $retval .= "-out $opt_o " if (defined $opt_o); 1075 $retval .= "-xdrop_ungap $opt_y " if (defined $opt_y); 1076 $retval .= "-xdrop_gap $opt_X " if (defined $opt_X); 1077 $retval .= "-min_raw_gapped_score $opt_N " if (defined $opt_N); 1078 $retval .= "-num_threads $opt_a " if (defined $opt_a); 1079 $retval .= "-num_descriptions $opt_v " if (defined $opt_v); 1080 $retval .= "-num_alignments $opt_b " if (defined $opt_b); 1081 $retval .= "-dbsize $opt_z " if (defined $opt_z); 1082 if (defined $opt_Y) { 1083 $retval .= "-searchsp " . &convert_float_to_int($opt_Y) . " "; 1084 } 1085 $retval .= "-xdrop_gap_final $opt_Z " if (defined $opt_Z); 1086 if (defined $opt_m) { 1087 if ($opt_m == 5 or $opt_m == 6) { 1088 print STDERR "Warning: -m5 or -m6 formatting options "; 1089 print STDERR "are not supported!\n"; 1090 } 1091 $opt_m -= 2 if ($opt_m >= 7); 1092 $retval .= "-outfmt $opt_m " 1093 } 1094 if (defined $opt_O) { 1095 unless ($retval =~ s/-out \S+ /-out $opt_O /) { 1096 $retval .= "-out $opt_O "; 1097 } 1098 unless ($retval =~ s/-outfmt \d+/-outfmt 8/) { 1099 $retval .= "-outfmt 8 "; 1100 } else { 1101 print STDERR "Warning: overriding output format\n"; 1102 } 1103 } 1104 if (defined $opt_T and (length($opt_T) == 0 or $opt_T =~ /t/i)) { 1105 $retval .= "-html " 1106 } 1107 if (defined $opt_P and $opt_P eq "1") { 1108 $retval .= "-window_size 0 "; 1109 } 1110 if (defined $opt_F) { 1111 $retval .= &convert_filter_string($opt_F, "blastp"); 1112 } 1113 if (defined $opt_I and (length($opt_I) == 0 or $opt_I =~ /t/i)) { 1114 $retval .= "-show_gis "; 1115 } 1116 if (defined $opt_J and (length($opt_J) == 0 or $opt_J =~ /t/i)) { 1117 $retval .= "-parse_deflines "; 1118 } 1119 if (defined $opt_U and (length($opt_U) == 0 or $opt_U =~ /t/i)) { 1120 $retval .= "-lcase_masking "; 1121 } 1122 $retval .= &convert_sequence_locations($opt_L, "query") if ($opt_L); 1123 1124 return $retval; 1125} 1126 1127sub handle_fastacmd 1128{ 1129 my $print_only = shift; 1130 my $path = DEFAULT_PATH; 1131 my ($opt_d, $opt_p, $opt_s, $opt_i, $opt_a, $opt_l, $opt_t, $opt_o, 1132 $opt_c, $opt_D, $opt_L, $opt_S, $opt_T, $opt_I, $opt_P); 1133 1134 GetOptions("<>" => sub { $application = shift; }, 1135 "print_only!" => $print_only, 1136 "path=s" => \$path, 1137 "D=i" => \$opt_D, 1138 "I:s" => \$opt_I, 1139 "L=s" => \$opt_L, 1140 "P=i" => \$opt_P, 1141 "S=i" => \$opt_S, 1142 "T:s" => \$opt_T, 1143 "a:s" => \$opt_a, 1144 "c:s" => \$opt_c, 1145 "d=s" => \$opt_d, 1146 "i=s" => \$opt_i, 1147 "l=i" => \$opt_l, 1148 "o=s" => \$opt_o, 1149 "p=s" => \$opt_p, 1150 "s=s" => \$opt_s, 1151 "t:s" => \$opt_t 1152 ); 1153 1154 my $retval = $path . "/blastdbcmd"; 1155 $retval .= &add_exe_extension(); 1156 $retval .= &create_db_argument($opt_d) if (defined $opt_d); 1157 if (defined $opt_p) { 1158 $retval .= "-dbtype "; 1159 if ($opt_p =~ /p/i) { 1160 $retval .= "prot "; 1161 } elsif ($opt_p =~ /f/i) { 1162 $retval .= "nucl "; 1163 } else { 1164 $retval .= "guess "; 1165 } 1166 } 1167 $retval .= "-entry $opt_s " if (defined $opt_s); 1168 $retval .= "-entry_batch $opt_i " if (defined $opt_i); 1169 $retval .= "-line_length $opt_l " if (defined $opt_l); 1170 $retval .= "-out $opt_o " if (defined $opt_o); 1171 $retval .= "-pig $opt_P " if (defined $opt_P); 1172 if (defined $opt_D) { 1173 $retval .= "-entry all -outfmt "; 1174 if ($opt_D eq '1') { 1175 $retval .= "\%f "; 1176 } elsif ($opt_D eq '2') { 1177 $retval .= "\%g "; 1178 } elsif ($opt_D eq '3') { 1179 $retval .= "\%a "; 1180 } else { 1181 die "Invalid argument to -D\n"; 1182 } 1183 } 1184 $retval .= &convert_sequence_locations($opt_L, "range") if ($opt_L); 1185 $retval .= &convert_strand($opt_S) if (defined $opt_S); 1186 if (defined $opt_T) { 1187 #print STDERR "Warning: -T option is not supported, please use " . 1188 # "the -outfmt option to blastdbcmd with \%T, \%L, or \%S as an " . 1189 # "argument\n"; 1190 $retval .= "-outfmt \"NCBI Taxonomy id: \%T; Common name: \%L; "; 1191 $retval .= "Scientific name: \%S\" "; 1192 } 1193 if (defined $opt_I and (length($opt_I) == 0 or $opt_I =~ /t/i)) { 1194 $retval .= "-info "; 1195 } 1196 if (defined $opt_a and (length($opt_a) == 0 or $opt_a =~ /t/i)) { 1197 $retval .= "-get_dups "; 1198 } 1199 if (defined $opt_t and (length($opt_t) == 0 or $opt_t =~ /t/i)) { 1200 $retval .= "-target_only "; 1201 } 1202 if (defined $opt_c and (length($opt_c) == 0 or $opt_c =~ /t/i)) { 1203 $retval .= "-ctrl_a "; 1204 } 1205 return $retval; 1206} 1207 1208sub handle_formatdb 1209{ 1210 my $print_only = shift; 1211 my $path = DEFAULT_PATH; 1212 my ($opt_B, $opt_F, $opt_L, $opt_T, $opt_V, $opt_a, $opt_b, $opt_e, $opt_i, 1213 $opt_l, $opt_n, $opt_o, $opt_p, $opt_s, $opt_t, $opt_v); 1214 1215 GetOptions("<>" => sub { $application = shift; }, 1216 "print_only!" => $print_only, 1217 "path=s" => \$path, 1218 "B=s" => \$opt_B, 1219 "F=s" => \$opt_F, 1220 "L=s" => \$opt_L, 1221 "T=s" => \$opt_T, 1222 "V:s" => \$opt_V, 1223 "a:s" => \$opt_a, 1224 "b:s" => \$opt_b, 1225 "e:s" => \$opt_e, 1226 "i=s" => \$opt_i, 1227 "l=s" => \$opt_l, 1228 "n=s" => \$opt_n, 1229 "o:s" => \$opt_o, 1230 "p:s" => \$opt_p, 1231 "s:s" => \$opt_s, 1232 "t=s" => \$opt_t, 1233 "v=i" => \$opt_v 1234 ); 1235 1236 my $retval = $path; 1237 if (defined $opt_L) { 1238 $retval .= "/blastdb_aliastool"; 1239 die "-i is required\n" unless (defined $opt_i); 1240 die "-F is required\n" unless (defined $opt_F); 1241 } else { 1242 $retval .= "/makeblastdb"; 1243 } 1244 $retval .= &add_exe_extension(); 1245 1246 if (defined $opt_B) { 1247 die "-F option must be specified with -B\n" unless (defined $opt_F); 1248 $retval = $path . "/blastdb_aliastool"; 1249 $retval .= &add_exe_extension(); 1250 $retval .= "-gi_file_in $opt_F -gi_file_out $opt_B"; 1251 return $retval; 1252 } 1253 1254 $retval .= "-title \"$opt_t\" " if (defined $opt_t); 1255 if (defined $opt_p) { 1256 $retval .= "-dbtype "; 1257 if ((length($opt_p) == 0 or $opt_p =~ /t/i)) { 1258 $retval .= "prot "; 1259 } else { 1260 $retval .= "nucl "; 1261 } 1262 } 1263 if ($retval =~ /blastdb_aliastool/) { 1264 $retval .= "-out $opt_L " if (defined $opt_L); 1265 if (defined $opt_i and not defined $opt_n) { 1266 $retval .= &create_db_argument($opt_i); 1267 } 1268 # there's no -n in blastdb_aliastool, as we copy the argument value 1269 # verbatim into the DBLIST field of the alias file, so we make 1270 # formatdb's -n option tool override -i 1271 $retval .= &create_db_argument($opt_n) if (defined $opt_n); 1272 } else { 1273 $retval .= "-out $opt_n " if (defined $opt_n); 1274 $retval .= "-in $opt_i " if (defined $opt_i); 1275 } 1276 $retval .= "-gilist $opt_F " if (defined $opt_F); 1277 $retval .= "-logfile $opt_l " if (defined $opt_l); 1278 $retval .= "-taxid-map $opt_T " if (defined $opt_T); 1279 1280 if (defined $opt_o and (length($opt_o) == 0 or $opt_o =~ /t/i)) { 1281 $retval .= "-parse_seqids "; 1282 } 1283 if (defined $opt_a) { 1284 print STDERR "Warning: -a option is not supported\n"; 1285 } 1286 if (defined $opt_b) { 1287 print STDERR "Warning: -b option is not supported\n"; 1288 } 1289 if (defined $opt_e) { 1290 print STDERR "Warning: -e option is not supported\n"; 1291 } 1292 if (defined $opt_s) { 1293 print STDERR "Warning: -s option is not supported\n"; 1294 } 1295 if (defined $opt_V) { 1296 print STDERR "Warning: -V option is not supported\n"; 1297 } 1298 if (defined $opt_v) { 1299 print STDERR "Warning: -v option is not supported, please use " . 1300 "the -max_file_sz option to makeblastdb\n"; 1301 } 1302 return $retval; 1303} 1304__END__ 1305 1306=head1 NAME 1307 1308B<legacy_blast.pl> - Convert BLAST command line invocations from NCBI C 1309toolkit's implementation to NCBI C++ toolkit's implementation. 1310 1311=head1 SYNOPSIS 1312 1313legacy_blast.pl <C toolkit command line program and arguments> [--print_only] 1314[--path /path/to/binaries] 1315legacy_blast.pl [--version] 1316legacy_blast.pl [--help] 1317 1318=head1 OPTIONS 1319 1320=over 2 1321 1322=item B<--path> 1323 1324Use the provided path as the location of the BLAST binaries to execute/print 1325(default: /usr/bin). 1326 1327=item B<--print_only> 1328 1329Print the equivalent command line option instead of running the command 1330(default: false). 1331 1332=item B<--version> 1333 1334Prints this script's version. Must be invoked as the first and only argument to 1335this script. 1336 1337=back 1338 1339=head1 DESCRIPTION 1340 1341This script converts and runs the equivalent NCBI C toolkit command line BLAST 1342program and arguments provided to it (whenever possible) to NCBI C++ tookit 1343BLAST programs. Note that to specify options to this script they B<MUST> use 2 1344dashes to prefix the options B<AND> be listed at the end of the command line 1345invocation to convert. 1346 1347=head1 EXIT CODES 1348 1349This script returns 0 on success and a non-zero value on errors. 1350 1351=head1 BUGS 1352 1353Please report them to <blast-help@ncbi.nlm.nih.gov> 1354 1355=head1 COPYRIGHT 1356 1357See PUBLIC DOMAIN NOTICE included at the top of this script. 1358 1359=cut 1360