1 /* $Id: blast_args.hpp 631554 2021-05-19 13:52:23Z ivanov $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Author: Jason Papadopoulos 27 * 28 */ 29 30 /** @file blast_args.hpp 31 * Interface for converting blast-related command line 32 * arguments into blast options 33 */ 34 35 #ifndef ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP 36 #define ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP 37 38 #include <corelib/ncbistd.hpp> 39 #include <corelib/ncbiargs.hpp> 40 #include <algo/blast/api/uniform_search.hpp> 41 #include <algo/blast/api/blast_options.hpp> 42 #include <algo/blast/api/blast_options_handle.hpp> 43 #include <algo/blast/igblast/igblast.hpp> 44 #include <algo/blast/api/setup_factory.hpp> // for CThreadable 45 #include <algo/blast/blastinput/cmdline_flags.hpp> 46 #include <algo/blast/blastinput/blast_input_aux.hpp> 47 48 #include <objmgr/scope.hpp> // for CScope 49 #include <objects/seqloc/Na_strand.hpp> 50 #include <objects/scoremat/PssmWithParameters.hpp> 51 52 #include <util/compress/stream_util.hpp> 53 54 BEGIN_NCBI_SCOPE 55 BEGIN_SCOPE(blast) 56 57 /** 58 * BLAST Command line arguments design 59 * The idea is to have several small objects (subclasses of IBlastCmdLineArgs) 60 * which can do two things: 61 * 1) On creation, add flags/options/etc to a CArgs object 62 * 2) When passed in a CBlastOptions object, call the appropriate methods based 63 * on the CArgs options set when the NCBI application framework parsed the 64 * command line. If data collected by the small object (from the command line) 65 * cannot be applied to the CBlastOptions object, then it's provided to the 66 * application via some other interface methods. 67 * 68 * Each command line application will have its own argument class (e.g.: 69 * CPsiBlastAppArgs), which will contain several of the aformentioned small 70 * objects. It will create and hold a reference to a CArgs class as well as 71 * a CBlastOptionsHandle object, which will pass to each of its small objects 72 * aggregated as data members and then return it to the caller (application) 73 * 74 * Categories of data to extract from command line options 75 * 1) BLAST algorithm options 76 * 2) Input/Output files, and their modifiers (e.g.: believe query defline) 77 * 3) BLAST database information (names, limitations, num db seqs) 78 * 4) Formatting options (html, display formats, etc) 79 */ 80 81 /** Interface definition for a generic command line option for BLAST 82 */ 83 class NCBI_BLASTINPUT_EXPORT IBlastCmdLineArgs : public CObject 84 { 85 public: 86 /** Our virtual destructor */ ~IBlastCmdLineArgs()87 virtual ~IBlastCmdLineArgs() {} 88 89 /** Sets the command line descriptions in the CArgDescriptions object 90 * relevant to the subclass 91 * @param arg_desc the argument descriptions object [in|out] 92 */ 93 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc) = 0; 94 95 /** Extracts BLAST algorithmic options from the command line arguments into 96 * the CBlastOptions object. Default implementation does nothing. 97 * @param cmd_line_args Command line arguments parsed by the NCBI 98 * application framework [in] 99 * @param options object to which the appropriate options will be set 100 * [in|out] 101 */ 102 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 103 CBlastOptions& options); 104 }; 105 106 /** Argument class to retrieve input and output streams for a command line 107 * program. 108 */ 109 class NCBI_BLASTINPUT_EXPORT CStdCmdLineArgs : public IBlastCmdLineArgs 110 { 111 public: 112 /** Default constructor */ CStdCmdLineArgs()113 CStdCmdLineArgs() : m_InputStream(0), m_OutputStream(0), 114 m_GzipEnabled(false), 115 m_SRAaccessionEnabled(false), 116 m_UnalignedOutputStream(0) {}; 117 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 118 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 119 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 120 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 121 CBlastOptions& options); 122 /** Get the input stream for a command line application */ 123 CNcbiIstream& GetInputStream() const; 124 /** Get the output stream for a command line application */ 125 CNcbiOstream& GetOutputStream() const; 126 /** Set the input stream if read from a saved search strategy */ 127 void SetInputStream(CRef<CTmpFile> input_file); 128 129 /** Set automatic decompression of the input file is file name is 130 * recognized 131 * @param g If true input file will be unzgipped if the file name ends with 132 * ".gz" [in] 133 */ SetGzipEnabled(bool g)134 void SetGzipEnabled(bool g) {m_GzipEnabled = g;} 135 136 /** enables sra accession flag 137 * @param g If true "-sra" will be added (not compatible with "-query") 138 */ SetSRAaccessionEnabled(bool g)139 void SetSRAaccessionEnabled(bool g) {m_SRAaccessionEnabled = g;} 140 141 /** Is there a separate output stream for unaligned sequences/reads 142 * (for magicblast) 143 * @return True if separate output stream has been set up, otherwise false 144 */ HasUnalignedOutputStream(void) const145 bool HasUnalignedOutputStream(void) const {return m_UnalignedOutputStream;} 146 147 /** Get output stream for unaligned sequences/reads (for magicblast) 148 * @return Output stream for unaligned reads or NULL 149 */ GetUnalignedOutputStream() const150 CNcbiOstream* GetUnalignedOutputStream() const 151 {return m_UnalignedOutputStream;} 152 153 private: 154 CNcbiIstream* m_InputStream; ///< Application's input stream 155 CNcbiOstream* m_OutputStream; ///< Application's output stream 156 auto_ptr<CDecompressIStream> m_DecompressIStream; 157 auto_ptr<CCompressOStream> m_CompressOStream; 158 159 /// ASN.1 specification of query sequences when read from a saved search 160 /// strategy 161 CRef<CTmpFile> m_QueryTmpInputFile; 162 163 /// If true input file will be decompressed with gzip if filename ends 164 /// with ".gz" 165 bool m_GzipEnabled; 166 167 /// If true, option to specify SRA runs will be presented as possible 168 /// query input 169 bool m_SRAaccessionEnabled; 170 171 /// Output stream to report unaligned sequences/reads 172 CNcbiOstream* m_UnalignedOutputStream; 173 unique_ptr<CCompressOStream> m_UnalignedCompressOStream; 174 }; 175 176 /** Argument class to populate an application's name and description */ 177 class NCBI_BLASTINPUT_EXPORT CProgramDescriptionArgs : public IBlastCmdLineArgs 178 { 179 public: 180 /** 181 * @brief Constructor 182 * 183 * @param program_name application's name [in] 184 * @param program_description application's description [in] 185 */ 186 CProgramDescriptionArgs(const string& program_name, 187 const string& program_description); 188 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 189 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 190 191 protected: 192 string m_ProgName; ///< Application's name 193 string m_ProgDesc; ///< Application's description 194 }; 195 196 /// Argument class to specify the supported tasks a given program 197 class NCBI_BLASTINPUT_EXPORT CTaskCmdLineArgs : public IBlastCmdLineArgs 198 { 199 public: 200 /** Constructor 201 * @param supported_tasks list of supported tasks [in] 202 * @param default_task One of the tasks above, to be displayed as 203 * default in the command line arguments (cannot be empty or absent from 204 * the set above) [in] 205 */ 206 CTaskCmdLineArgs(const set<string>& supported_tasks, 207 const string& default_task); 208 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 209 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 210 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 211 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 212 CBlastOptions& options); 213 private: 214 /// Set of supported tasks by this command line argument 215 const set<string> m_SupportedTasks; 216 /// Default task for this command line argument 217 string m_DefaultTask; 218 }; 219 220 /** Argument class to retrieve and set the window size BLAST algorithm 221 * option */ 222 class NCBI_BLASTINPUT_EXPORT CWindowSizeArg : public IBlastCmdLineArgs 223 { 224 public: 225 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 226 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 227 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions 228 * @note this depends on the matrix already being set... 229 */ 230 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 231 CBlastOptions& options); 232 }; 233 234 /** Argument class to retrieve and set the off-diagonal range used in 2-hit 235 algorithm */ 236 class NCBI_BLASTINPUT_EXPORT COffDiagonalRangeArg : public IBlastCmdLineArgs 237 { 238 public: 239 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 240 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 241 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions 242 * @note this depends on the matrix already being set... 243 */ 244 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 245 CBlastOptions& options); 246 }; 247 248 /** Argument class to retrieve and set the word threshold BLAST algorithm 249 * option */ 250 class NCBI_BLASTINPUT_EXPORT CWordThresholdArg : public IBlastCmdLineArgs 251 { 252 public: 253 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 254 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 255 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions 256 * @note this depends on the matrix already being set... 257 */ 258 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 259 CBlastOptions& options); 260 }; 261 262 /** RMH: Argument class to retrieve and set the options specific to 263 * the RMBlastN algorithm 264 */ 265 class NCBI_BLASTINPUT_EXPORT CRMBlastNArg : public IBlastCmdLineArgs 266 { 267 public: 268 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 269 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 270 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 271 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 272 CBlastOptions& options); 273 }; 274 275 /** Argument class to retrieve and set the scoring matrix name BLAST algorithm 276 * option */ 277 class NCBI_BLASTINPUT_EXPORT CMatrixNameArg : public IBlastCmdLineArgs 278 { 279 public: 280 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 281 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 282 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 283 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 284 CBlastOptions& options); 285 }; 286 287 /** Argument class for general search BLAST algorithm options: evalue, gap 288 * penalties, query filter string, ungapped x-drop, initial and final gapped 289 * x-drop, word size, percent identity, and effective search space. 290 */ 291 class NCBI_BLASTINPUT_EXPORT CGenericSearchArgs : public IBlastCmdLineArgs 292 { 293 public: 294 /** 295 * @brief Constructor 296 * 297 * @param query_is_protein is the query sequence(s) protein? [in] 298 * @param is_rpsblast is it RPS-BLAST? [in] 299 * @param show_perc_identity should the percent identity be shown? 300 * @param is_igblast is it IG-BLAST? [in] 301 * Currently only supported for blastn [in] 302 */ CGenericSearchArgs(bool query_is_protein=true,bool is_rpsblast=false,bool show_perc_identity=false,bool is_tblastx=false,bool is_igblast=false,bool suppress_sum_stats=false)303 CGenericSearchArgs(bool query_is_protein = true, bool is_rpsblast = false, 304 bool show_perc_identity = false, bool is_tblastx = false, 305 bool is_igblast = false, bool suppress_sum_stats = false) 306 : m_QueryIsProtein(query_is_protein), m_IsRpsBlast(is_rpsblast), 307 m_ShowPercentIdentity(show_perc_identity), m_IsTblastx(is_tblastx), 308 m_IsIgBlast(is_igblast), m_SuppressSumStats(suppress_sum_stats) {} 309 310 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 311 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 312 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 313 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 314 CBlastOptions& options); 315 private: 316 bool m_QueryIsProtein; /**< true if the query is protein */ 317 bool m_IsRpsBlast; /**< true if the search is RPS-BLAST */ 318 bool m_ShowPercentIdentity; /**< true if the percent identity option should 319 be shown */ 320 bool m_IsTblastx; /**< true if the search is tblastx */ 321 bool m_IsIgBlast; /**< true if the search is igblast */ 322 bool m_SuppressSumStats; /**< true if search is blastn or blastp */ 323 }; 324 325 /** Argument class for collecting filtering options */ 326 class NCBI_BLASTINPUT_EXPORT CFilteringArgs : public IBlastCmdLineArgs 327 { 328 public: 329 /** 330 * @brief Constructor 331 * 332 * @param query_is_protein is the query sequence(s) protein? [in] 333 * @param filter_by_default should filtering be applied by default? [in] 334 */ CFilteringArgs(bool query_is_protein=true,bool filter_by_default=true)335 CFilteringArgs(bool query_is_protein = true, 336 bool filter_by_default = true) 337 : m_QueryIsProtein(query_is_protein), 338 m_FilterByDefault(filter_by_default) {} 339 340 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 341 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 342 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 343 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 344 CBlastOptions& options); 345 private: 346 bool m_QueryIsProtein; /**< true if the query is protein */ 347 bool m_FilterByDefault; /**< Should filtering be applied by default? */ 348 349 /** 350 * @brief Auxiliary method to tokenize the filtering string. 351 * 352 * @param filtering_args string to tokenize [in] 353 * @param output vector with tokens [in|out] 354 */ 355 void x_TokenizeFilteringArgs(const string& filtering_args, 356 vector<string>& output) const; 357 }; 358 359 /// Defines values for match and mismatch in nucleotide comparisons as well as 360 /// non-greedy extension 361 class NCBI_BLASTINPUT_EXPORT CNuclArgs : public IBlastCmdLineArgs 362 { 363 public: 364 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 365 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 366 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 367 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 368 CBlastOptions& options); 369 }; 370 371 /// Argument class to retrieve discontiguous megablast arguments 372 class NCBI_BLASTINPUT_EXPORT CDiscontiguousMegablastArgs : public IBlastCmdLineArgs 373 { 374 public: 375 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 376 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 377 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 378 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 379 CBlastOptions& options); 380 381 /// Value to specify coding template type 382 static const string kTemplType_Coding; 383 /// Value to specify optimal template type 384 static const string kTemplType_Optimal; 385 /// Value to specify coding+optimal template type 386 static const string kTemplType_CodingAndOptimal; 387 }; 388 389 /** Argument class for collecting composition based statistics options */ 390 class NCBI_BLASTINPUT_EXPORT CCompositionBasedStatsArgs : public IBlastCmdLineArgs 391 { 392 public: 393 /// Constructor 394 ///@param is_2and3supported Are composition based statistics options 2 and 395 /// 3 supported [in] 396 ///@param default_option Default composition based satatistics option [in] 397 ///@param zero_option_descr Non-standard description for composition 398 /// based statistics option zero [in] CCompositionBasedStatsArgs(bool is_2and3supported=true,const string & default_option=kDfltArgCompBasedStats,const string & zero_option_descr="")399 CCompositionBasedStatsArgs(bool is_2and3supported = true, 400 const string& default_option 401 = kDfltArgCompBasedStats, 402 const string& zero_option_descr = "") 403 : m_Is2and3Supported(is_2and3supported), 404 m_DefaultOpt(default_option), 405 m_ZeroOptDescr(zero_option_descr) {} 406 407 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 408 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 409 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 410 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 411 CBlastOptions& options); 412 413 protected: 414 /// Are options 2 and 3 supported 415 bool m_Is2and3Supported; 416 /// Default option 417 string m_DefaultOpt; 418 /// Non standard description for option zero 419 string m_ZeroOptDescr; 420 }; 421 422 /** Argument class for collecting gapped options */ 423 class NCBI_BLASTINPUT_EXPORT CGappedArgs : public IBlastCmdLineArgs 424 { 425 public: 426 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 427 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 428 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 429 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 430 CBlastOptions& options); 431 }; 432 433 /** Argument class for collecting the largest intron size */ 434 class NCBI_BLASTINPUT_EXPORT CLargestIntronSizeArgs : public IBlastCmdLineArgs 435 { 436 public: 437 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 438 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 439 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 440 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 441 CBlastOptions& options); 442 }; 443 444 /// Argument class to collect the frame shift penalty for out-of-frame searches 445 class NCBI_BLASTINPUT_EXPORT CFrameShiftArgs : public IBlastCmdLineArgs 446 { 447 public: 448 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 449 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 450 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 451 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 452 CBlastOptions& options); 453 }; 454 455 /// Argument class to collect the genetic code for all queries/subjects 456 class NCBI_BLASTINPUT_EXPORT CGeneticCodeArgs : public IBlastCmdLineArgs 457 { 458 public: 459 /// Enumeration defining which sequences the genetic code applies to 460 enum ETarget { 461 eQuery, ///< Query genetic code 462 eDatabase ///< Database genetic code 463 }; 464 465 466 /** 467 * @brief Constructor 468 * 469 * @param t genetic code target (query or database) 470 */ CGeneticCodeArgs(ETarget t)471 CGeneticCodeArgs(ETarget t) : m_Target(t) {}; 472 473 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 474 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 475 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 476 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 477 CBlastOptions& options); 478 479 private: 480 ETarget m_Target; ///< Genetic code target 481 }; 482 483 /// Argument class to retrieve the gap trigger option 484 class NCBI_BLASTINPUT_EXPORT CGapTriggerArgs : public IBlastCmdLineArgs 485 { 486 public: 487 /** 488 * @brief Constructor 489 * 490 * @param query_is_protein is the query sequence(s) protein? 491 */ CGapTriggerArgs(bool query_is_protein)492 CGapTriggerArgs(bool query_is_protein) 493 : m_QueryIsProtein(query_is_protein) {} 494 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 495 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 496 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 497 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 498 CBlastOptions& options); 499 private: 500 bool m_QueryIsProtein; /**< true if the query is protein */ 501 }; 502 503 /// Argument class to collect PSSM engine options 504 class NCBI_BLASTINPUT_EXPORT CPssmEngineArgs : public IBlastCmdLineArgs 505 { 506 public: 507 /// Constructor 508 /// @param is_deltablast Are the aruments set up for Delta Blast [in] CPssmEngineArgs(bool is_deltablast=false)509 CPssmEngineArgs(bool is_deltablast = false) : m_IsDeltaBlast(is_deltablast) 510 {} 511 512 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 513 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 514 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 515 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 516 CBlastOptions& options); 517 518 private: 519 /// Are these arumnets for Delta Blast 520 bool m_IsDeltaBlast; 521 }; 522 523 /// Argument class to import/export the search strategy 524 class NCBI_BLASTINPUT_EXPORT CSearchStrategyArgs : public IBlastCmdLineArgs 525 { 526 public: 527 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 528 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 529 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 530 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 531 CBlastOptions& options); 532 533 /// Get the input stream for the search strategy 534 CNcbiIstream* GetImportStream(const CArgs& args) const; 535 /// Get the output stream for the search strategy 536 CNcbiOstream* GetExportStream(const CArgs& args) const; 537 }; 538 539 /// Argument class to collect options specific to PSI-BLAST 540 class NCBI_BLASTINPUT_EXPORT CPsiBlastArgs : public IBlastCmdLineArgs 541 { 542 public: 543 /// Enumeration to determine the molecule type of the database 544 enum ETargetDatabase { 545 eProteinDb, ///< Traditional, iterated PSI-BLAST 546 eNucleotideDb ///< PSI-Tblastn, non-iterated 547 }; 548 549 /** 550 * @brief Constructor 551 * 552 * @param db_target Molecule type of the database 553 * @param is_deltablast Are the aruments set up for Delta Blast 554 */ CPsiBlastArgs(ETargetDatabase db_target=eProteinDb,bool is_deltablast=false)555 CPsiBlastArgs(ETargetDatabase db_target = eProteinDb, 556 bool is_deltablast = false) 557 : m_DbTarget(db_target), m_NumIterations(1), 558 m_CheckPointOutput(0), m_AsciiMatrixOutput(0), 559 m_IsDeltaBlast(is_deltablast), 560 m_SaveLastPssm(false) 561 {}; 562 563 /// Our virtual destructor ~CPsiBlastArgs()564 virtual ~CPsiBlastArgs() {} 565 566 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 567 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 568 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 569 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 570 CBlastOptions& options); 571 572 /// Retrieve the number of iterations to perform GetNumberOfIterations() const573 size_t GetNumberOfIterations() const { 574 return m_NumIterations; 575 } 576 577 /// Retrieve the number of iterations to perform SetNumberOfIterations(unsigned int num_iters)578 void SetNumberOfIterations(unsigned int num_iters) { 579 m_NumIterations = num_iters; 580 } 581 /// Returns true if checkpoint PSSM is required to be printed RequiresCheckPointOutput() const582 bool RequiresCheckPointOutput() const { 583 return m_CheckPointOutput != NULL; 584 } 585 /// Get the checkpoint file output stream 586 /// @return pointer to output stream, not to be free'd by the caller GetCheckPointOutputStream()587 CNcbiOstream* GetCheckPointOutputStream() { 588 return m_CheckPointOutput ? m_CheckPointOutput->GetStream() : NULL; 589 } 590 /// Returns true if ASCII PSSM is required to be printed RequiresAsciiPssmOutput() const591 bool RequiresAsciiPssmOutput() const { 592 return m_AsciiMatrixOutput != NULL; 593 } 594 /// Get the ASCII matrix output stream 595 /// @return pointer to output stream, not to be free'd by the caller GetAsciiMatrixOutputStream()596 CNcbiOstream* GetAsciiMatrixOutputStream() { 597 return m_AsciiMatrixOutput ? m_AsciiMatrixOutput->GetStream() : NULL; 598 } 599 600 /// Get the PSSM read from checkpoint file GetInputPssm() const601 CRef<objects::CPssmWithParameters> GetInputPssm() const { 602 return m_Pssm; 603 } 604 605 /// Set the PSSM read from saved search strategy SetInputPssm(CRef<objects::CPssmWithParameters> pssm)606 void SetInputPssm(CRef<objects::CPssmWithParameters> pssm) { 607 m_Pssm = pssm; 608 } 609 610 /// Should the PSSM after the last database search be saved GetSaveLastPssm(void) const611 bool GetSaveLastPssm(void) const { 612 return m_SaveLastPssm; 613 } 614 615 /// Set the on/off switch for saving PSSM after the last database search SetSaveLastPssm(bool b)616 void SetSaveLastPssm(bool b) { 617 m_SaveLastPssm = b; 618 } 619 620 private: 621 /// Molecule of the database 622 ETargetDatabase m_DbTarget; 623 /// number of iterations to perform 624 size_t m_NumIterations; 625 /// checkpoint output file 626 CRef<CAutoOutputFileReset> m_CheckPointOutput; 627 /// ASCII matrix output file 628 CRef<CAutoOutputFileReset> m_AsciiMatrixOutput; 629 /// PSSM 630 CRef<objects::CPssmWithParameters> m_Pssm; 631 632 /// Are the aruments set up for Delta Blast 633 bool m_IsDeltaBlast; 634 635 /// Save PSSM after the last database search 636 bool m_SaveLastPssm; 637 638 /// Prohibit copy constructor 639 CPsiBlastArgs(const CPsiBlastArgs& rhs); 640 /// Prohibit assignment operator 641 CPsiBlastArgs& operator=(const CPsiBlastArgs& rhs); 642 643 /// Auxiliary function to create a PSSM from a multiple sequence alignment 644 /// file 645 CRef<objects::CPssmWithParameters> 646 x_CreatePssmFromMsa(CNcbiIstream& input_stream, CBlastOptions& opt, 647 bool save_ascii_pssm, unsigned int msa_master_idx, 648 bool ignore_pssm_tmpl_seq); 649 }; 650 651 /// Argument class to collect options specific to PHI-BLAST 652 class NCBI_BLASTINPUT_EXPORT CPhiBlastArgs : public IBlastCmdLineArgs 653 { 654 public: 655 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 656 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 657 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 658 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 659 CBlastOptions& options); 660 }; 661 662 /// Argument class to collect options specific to KBLASTP 663 class NCBI_BLASTINPUT_EXPORT CKBlastpArgs : public IBlastCmdLineArgs 664 { 665 public: 666 667 /// Constructor CKBlastpArgs(void)668 CKBlastpArgs(void) : m_JDistance(0.10), m_MinHits(0), m_CandidateSeqs(1000) {} 669 670 /// Our virtual destructor ~CKBlastpArgs()671 virtual ~CKBlastpArgs() {} 672 673 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 674 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 675 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 676 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 677 CBlastOptions& options); 678 679 680 /// Get the Jaccard distance GetJaccardDistance(void)681 double GetJaccardDistance(void) { return m_JDistance;} 682 683 /// Get the minimum number of LSH matches. GetMinHits(void)684 int GetMinHits(void) {return m_MinHits;} 685 686 /// The database GetDatabase(void)687 string GetDatabase(void) {return m_DbIndex;} 688 689 /// Number of candidate sequences to attempt with BLASTP GetCandidateSeqs(void)690 int GetCandidateSeqs(void) {return m_CandidateSeqs;} 691 692 private: 693 /// Prohibit copy constructor 694 CKBlastpArgs(const CKBlastpArgs& rhs); 695 /// Prohibit assignment operator 696 CKBlastpArgs& operator=(const CKBlastpArgs& rhs); 697 698 /// Jaccard distance 699 double m_JDistance; 700 701 /// Minimum number of hits in LSH phase 702 int m_MinHits; 703 704 /// Database/index 705 string m_DbIndex; 706 707 /// Number of candidate sequences to try BLAST on. 708 int m_CandidateSeqs; 709 }; 710 711 /// Argument class to collect options specific to DELTA-BLAST 712 class NCBI_BLASTINPUT_EXPORT CDeltaBlastArgs : public IBlastCmdLineArgs 713 { 714 public: 715 716 /// Constructor CDeltaBlastArgs(void)717 CDeltaBlastArgs(void) : m_ShowDomainHits(false) {} 718 719 /// Our virtual destructor ~CDeltaBlastArgs()720 virtual ~CDeltaBlastArgs() {} 721 722 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 723 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 724 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 725 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 726 CBlastOptions& options); 727 728 /// Get domain database GetDomainDatabase(void)729 CRef<CSearchDatabase> GetDomainDatabase(void) 730 {return m_DomainDb;} 731 732 /// Get show domain hits option value GetShowDomainHits(void) const733 bool GetShowDomainHits(void) const {return m_ShowDomainHits;} 734 735 private: 736 /// Prohibit copy constructor 737 CDeltaBlastArgs(const CDeltaBlastArgs& rhs); 738 /// Prohibit assignment operator 739 CDeltaBlastArgs& operator=(const CDeltaBlastArgs& rhs); 740 741 private: 742 743 /// Conserved Domain Database 744 CRef<CSearchDatabase> m_DomainDb; 745 746 /// Is printing CDD hits requested 747 bool m_ShowDomainHits; 748 }; 749 750 751 class NCBI_BLASTINPUT_EXPORT CMappingArgs : public IBlastCmdLineArgs 752 { 753 public: CMappingArgs(void)754 CMappingArgs(void) {} 755 756 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 757 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 758 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 759 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 760 CBlastOptions& options); 761 762 }; 763 764 /*****************************************************************************/ 765 // Input options 766 767 /// Argument class to collect query options 768 class NCBI_BLASTINPUT_EXPORT CQueryOptionsArgs : public IBlastCmdLineArgs 769 { 770 public: 771 /** 772 * @brief Constructor 773 * 774 * @param query_cannot_be_nucl can the query not be nucleotide? 775 */ CQueryOptionsArgs(bool query_cannot_be_nucl=false)776 CQueryOptionsArgs(bool query_cannot_be_nucl = false) 777 : m_Strand(objects::eNa_strand_unknown), m_Range(), 778 m_UseLCaseMask(kDfltArgUseLCaseMasking), 779 m_ParseDeflines(kDfltArgParseDeflines), 780 m_QueryCannotBeNucl(query_cannot_be_nucl) 781 {}; 782 783 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 784 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 785 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 786 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 787 CBlastOptions& options); 788 789 /// Get query sequence range restriction GetRange() const790 TSeqRange GetRange() const { return m_Range; } 791 /// Set query sequence range restriction SetRange(const TSeqRange & range)792 void SetRange(const TSeqRange& range) { m_Range = range; } 793 /// Get strand to search in query sequence(s) GetStrand() const794 objects::ENa_strand GetStrand() const { return m_Strand; } 795 /// Use lowercase masking in FASTA input? UseLowercaseMasks() const796 bool UseLowercaseMasks() const { return m_UseLCaseMask; } 797 /// Should the defline be parsed? GetParseDeflines() const798 bool GetParseDeflines() const { return m_ParseDeflines; } 799 800 /// Is the query sequence protein? QueryIsProtein() const801 bool QueryIsProtein() const { return m_QueryCannotBeNucl; } 802 803 private: 804 /// Strand(s) to search 805 objects::ENa_strand m_Strand; 806 /// range to restrict the query sequence(s) 807 TSeqRange m_Range; 808 /// use lowercase masking in FASTA input 809 bool m_UseLCaseMask; 810 /// Should the deflines be parsed? 811 bool m_ParseDeflines; 812 813 /// only false for blast[xn], and tblastx 814 /// true in case of PSI-BLAST 815 bool m_QueryCannotBeNucl; 816 }; 817 818 /// Argument class to collect query options for BLAST Mapper 819 class NCBI_BLASTINPUT_EXPORT CMapperQueryOptionsArgs : public CQueryOptionsArgs 820 { 821 public: 822 823 /// Input formats 824 enum EInputFormat { 825 eFasta = 0, 826 eFastc, 827 eFastq, 828 eASN1text, 829 eASN1bin, 830 eSra 831 }; 832 833 CMapperQueryOptionsArgs(void)834 CMapperQueryOptionsArgs(void) 835 : CQueryOptionsArgs(false), 836 m_IsPaired(false), 837 m_InputFormat(eFasta), 838 m_MateInputStream(NULL), 839 m_EnableSraCache(false) 840 {} 841 842 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 843 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 844 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 845 virtual void ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt); 846 847 /// Are query sequences paired IsPaired(void) const848 bool IsPaired(void) const {return m_IsPaired;} 849 850 /// Are queries provided in Fastc format GetInputFormat(void) const851 EInputFormat GetInputFormat(void) const 852 {return m_InputFormat;} 853 854 /// Does the mate input stream exits HasMateInputStream(void) const855 bool HasMateInputStream(void) const {return m_MateInputStream;} 856 857 /// Get input stream for query mates GetMateInputStream(void) const858 CNcbiIstream* GetMateInputStream(void) const {return m_MateInputStream;} 859 860 /// Get a list of SRA accessions GetSraAccessions(void) const861 const vector<string>& GetSraAccessions(void) const 862 {return m_SraAccessions;} 863 864 /// Is SRA caching in local files enabled 865 /// (see File Caching at 866 /// https://github.com/ncbi/sra-tools/wiki/Toolkit-Configuration) IsSraCacheEnabled(void) const867 bool IsSraCacheEnabled(void) const {return m_EnableSraCache;} 868 869 private: 870 bool m_IsPaired; 871 EInputFormat m_InputFormat; 872 vector<string> m_SraAccessions; 873 874 CNcbiIstream* m_MateInputStream; 875 auto_ptr<CDecompressIStream> m_DecompressIStream; 876 877 bool m_EnableSraCache; 878 }; 879 880 881 /// Argument class to collect database/subject arguments 882 class NCBI_BLASTINPUT_EXPORT CBlastDatabaseArgs : public IBlastCmdLineArgs 883 { 884 public: 885 /// The default priority for subjects, should be used for 886 /// subjects/databases 887 static const int kSubjectsDataLoaderPriority = 10; 888 889 /// alias for the database molecule type 890 typedef CSearchDatabase::EMoleculeType EMoleculeType; 891 892 /// Auxiliary function to determine if the database/subject sequence has 893 /// been set 894 static bool HasBeenSet(const CArgs& args); 895 896 /// Constructor 897 /// @param request_mol_type If true, the command line arguments will 898 /// include a mandatory option to disambiguate whether a protein or a 899 /// nucleotide database is searched 900 /// @param is_rpsblast is it RPS-BLAST? 901 /// @param is_igblast is it IG-BLAST? 902 /// @param is_deltablast is it DELTA-BLAST? 903 CBlastDatabaseArgs(bool request_mol_type = false, 904 bool is_rpsblast = false, 905 bool is_igblast = false, 906 bool is_mapper = false, 907 bool is_kblast = false); 908 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 909 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 910 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 911 virtual void ExtractAlgorithmOptions(const CArgs& args, 912 CBlastOptions& opts); 913 914 /// Turns on/off database masking support SetDatabaseMaskingSupport(bool val)915 void SetDatabaseMaskingSupport(bool val) { 916 m_SupportsDatabaseMasking = val; 917 } 918 919 /// Is the database/subject protein? IsProtein() const920 bool IsProtein() const { return m_IsProtein; } 921 922 /// Get the BLAST database name 923 /// @return empty string in the case of BLAST2Sequences, otherwise the 924 /// BLAST database name GetDatabaseName() const925 string GetDatabaseName() const { 926 return m_SearchDb.Empty() ? kEmptyStr : m_SearchDb->GetDatabaseName(); 927 } 928 929 /// Retrieve the search database information GetSearchDatabase() const930 CRef<CSearchDatabase> GetSearchDatabase() const { return m_SearchDb; } 931 /// Set the search database information. 932 /// use case: recovering from search strategy SetSearchDatabase(CRef<CSearchDatabase> search_db)933 void SetSearchDatabase(CRef<CSearchDatabase> search_db) { 934 m_SearchDb = search_db; 935 m_IsProtein = search_db->IsProtein(); 936 } 937 938 /// Sets the subject sequences. 939 /// use case: recovering from search strategy SetSubjects(CRef<IQueryFactory> subjects,CRef<CScope> scope,bool is_protein)940 void SetSubjects(CRef<IQueryFactory> subjects, CRef<CScope> scope, 941 bool is_protein) { 942 m_Subjects = subjects; 943 m_Scope = scope; 944 m_IsProtein = is_protein; 945 } 946 947 /// Retrieve subject sequences, if provided 948 /// @param scope scope to which to sequence read will be added (if 949 /// non-NULL) [in] 950 /// @return empty CRef<> if no subjects were provided, otherwise a properly 951 /// initialized IQueryFactory object GetSubjects(objects::CScope * scope=NULL)952 CRef<IQueryFactory> GetSubjects(objects::CScope* scope = NULL) { 953 if (m_Subjects && scope) { 954 // m_Scope contains the subject(s) read 955 _ASSERT(m_Scope.NotEmpty()); 956 // Add the scope with a lower priority to avoid conflicts 957 scope->AddScope(*m_Scope, kSubjectsDataLoaderPriority); 958 } 959 return m_Subjects; 960 } 961 SetIPGFilteringSupport(bool val)962 void SetIPGFilteringSupport(bool val) { 963 m_SupportIPGFiltering = val; 964 } 965 966 protected: 967 CRef<CSearchDatabase> m_SearchDb;/**< Description of the BLAST database */ 968 bool m_RequestMoleculeType; /**< Determines whether the database's 969 molecule type should be requested in the 970 command line, true in case of PSI-BLAST 971 */ 972 bool m_IsRpsBlast; /**< true if the search is RPS-BLAST */ 973 bool m_IsIgBlast; /**< true if the search is Ig-BLAST */ 974 975 bool m_IsProtein; /**< Is the database/subject(s) protein? */ 976 bool m_IsMapper; /**< true for short read mapper */ 977 bool m_IsKBlast; /**< true for Kblastp */ 978 CRef<IQueryFactory> m_Subjects; /**< The subject sequences */ 979 CRef<objects::CScope> m_Scope; /**< CScope object in which all subject 980 sequences read are kept */ 981 bool m_SupportsDatabaseMasking; /**< true if it's supported */ 982 bool m_SupportIPGFiltering; /**< true if IPG filtering is supported */ 983 }; 984 985 /// Argument class to collect options specific to igBLAST 986 class NCBI_BLASTINPUT_EXPORT CIgBlastArgs : public IBlastCmdLineArgs 987 { 988 public: CIgBlastArgs(bool is_protein)989 CIgBlastArgs(bool is_protein) : m_IsProtein(is_protein) {}; 990 991 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 992 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 993 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 994 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 995 CBlastOptions& options); 996 GetIgBlastOptions()997 CRef<CIgBlastOptions> GetIgBlastOptions() { return m_IgOptions; } 998 AddIgSequenceScope(CRef<objects::CScope> scope)999 void AddIgSequenceScope(CRef<objects::CScope> scope) { 1000 1001 if (m_Scope.NotEmpty()) { 1002 // Add the scope with a lower priority to avoid conflicts 1003 scope->AddScope(*m_Scope, 1004 CBlastDatabaseArgs::kSubjectsDataLoaderPriority); 1005 } 1006 } 1007 1008 private: 1009 /// Is this a protein search? 1010 bool m_IsProtein; 1011 /// Igblast options to fill 1012 CRef<CIgBlastOptions> m_IgOptions; 1013 /// scope to get sequences 1014 CRef<objects::CScope> m_Scope; 1015 }; 1016 1017 /// Argument class to collect formatting options, use this to create a 1018 /// CBlastFormat object. 1019 /// @note This object is also needed to set the maximum number of target 1020 /// sequences to save (hitlist size) 1021 class NCBI_BLASTINPUT_EXPORT CFormattingArgs : public IBlastCmdLineArgs 1022 { 1023 public: 1024 /// Defines the output formats supported by our command line formatter 1025 enum EOutputFormat { 1026 /// Standard pairwise alignments 1027 ePairwise = 0, 1028 ///< Query anchored showing identities 1029 eQueryAnchoredIdentities, 1030 ///< Query anchored no identities 1031 eQueryAnchoredNoIdentities, 1032 ///< Flat query anchored showing identities 1033 eFlatQueryAnchoredIdentities, 1034 ///< Flat query anchored no identities 1035 eFlatQueryAnchoredNoIdentities, 1036 /// XML output 1037 eXml, 1038 /// Tabular output 1039 eTabular, 1040 /// Tabular output with comments 1041 eTabularWithComments, 1042 /// ASN.1 text output 1043 eAsnText, 1044 /// ASN.1 binary output 1045 eAsnBinary, 1046 /// Comma-separated values 1047 eCommaSeparatedValues, 1048 /// BLAST archive format 1049 eArchiveFormat, 1050 /// JSON seq-align 1051 eJsonSeqalign, 1052 /// JSON XInclude 1053 eJson, 1054 /// XML2 XInclude 1055 eXml2, 1056 /// JSON2 single file 1057 eJson_S, 1058 /// XML2 single file 1059 eXml2_S, 1060 /// SAM format 1061 eSAM, 1062 1063 eTaxFormat, 1064 1065 ///igblast AIRR rearrangement, 19 1066 eAirrRearrangement, 1067 1068 /// unaligned reads in magicblast 1069 eFasta, 1070 /// Sentinel value for error checking 1071 eEndValue 1072 1073 }; 1074 1075 enum EFormatFlags { 1076 eDefaultFlag = 0, 1077 // Set if VDB 1078 eIsVDB = 0x01, 1079 // Set if SAM format is supported 1080 eIsSAM = 0x02, 1081 // Set if both VDB and SAM is true 1082 eIsVDB_SAM = eIsVDB | eIsSAM, 1083 //Is eAirrRearrangement format supported? 1084 eIsAirrRearrangement = 0x04 1085 }; 1086 /// Default constructor CFormattingArgs(bool isIgblast=false,EFormatFlags flag=eDefaultFlag)1087 CFormattingArgs(bool isIgblast = false, EFormatFlags flag = eDefaultFlag) 1088 : m_OutputFormat(ePairwise), m_ShowGis(false), 1089 m_NumDescriptions(0), m_NumAlignments(0), 1090 m_DfltNumDescriptions(0), m_DfltNumAlignments(0), 1091 m_Html(false), 1092 m_IsIgBlast(isIgblast), 1093 m_LineLength(align_format::kDfltLineLength), 1094 m_FormatFlags(flag), 1095 m_HitsSortOption(-1), 1096 m_HspsSortOption(-1) 1097 { 1098 if (m_IsIgBlast) { 1099 m_DfltNumAlignments = m_DfltNumDescriptions = 10; 1100 } else { 1101 m_DfltNumAlignments = align_format::kDfltArgNumAlignments; 1102 m_DfltNumDescriptions = align_format::kDfltArgNumDescriptions; 1103 } 1104 }; 1105 1106 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 1107 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 1108 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 1109 virtual void ExtractAlgorithmOptions(const CArgs& args, 1110 CBlastOptions& opts); 1111 1112 /// Parses the output format command line option value, returns the 1113 /// requested output format type and any custom output formats (if 1114 /// any and applicable) 1115 /// @param args Command line arguments object [in] 1116 /// @param fmt_type Output format type requested in command line options 1117 /// [out] 1118 /// @param custom_fmt_spec Custom output format specification in command 1119 /// line options [out] 1120 virtual void 1121 ParseFormattingString(const CArgs& args, 1122 EOutputFormat& fmt_type, 1123 string& custom_fmt_spec, 1124 string& custom_delim) const; 1125 1126 /// Get the choice of formatted output GetFormattedOutputChoice() const1127 EOutputFormat GetFormattedOutputChoice() const { 1128 return m_OutputFormat; 1129 } 1130 1131 /// Returns true if the desired output format is structured (needed to 1132 /// determine whether to print or not that a PSI-BLAST search has 1133 /// converged - this is not supported in structured formats) HasStructuredOutputFormat() const1134 bool HasStructuredOutputFormat() const { 1135 return m_OutputFormat == eXml || 1136 m_OutputFormat == eAsnText || 1137 m_OutputFormat == eAsnBinary || 1138 m_OutputFormat == eXml2 || 1139 m_OutputFormat == eJson || 1140 m_OutputFormat == eXml2_S || 1141 m_OutputFormat == eJson_S || 1142 m_OutputFormat == eJsonSeqalign || 1143 m_OutputFormat == eSAM; 1144 } 1145 1146 /// Display the NCBI GIs in formatted output? ShowGis() const1147 bool ShowGis() const { 1148 return m_ShowGis; 1149 } 1150 /// Number of one-line descriptions to show in traditional BLAST output GetNumDescriptions() const1151 TSeqPos GetNumDescriptions() const { 1152 return m_NumDescriptions; 1153 } 1154 /// Number of alignments to show in traditional BLAST output GetNumAlignments() const1155 TSeqPos GetNumAlignments() const { 1156 return m_NumAlignments; 1157 } 1158 /// Display HTML output? DisplayHtmlOutput() const1159 bool DisplayHtmlOutput() const { 1160 return m_Html; 1161 } 1162 1163 /// Retrieve for string that specifies the custom output format for tabular 1164 /// and comma-separated value GetCustomOutputFormatSpec() const1165 string GetCustomOutputFormatSpec() const { 1166 return m_CustomOutputFormatSpec; 1167 } 1168 1169 virtual bool ArchiveFormatRequested(const CArgs& args) const; 1170 GetLineLength() const1171 size_t GetLineLength() const { 1172 return m_LineLength; 1173 } GetHitsSortOption() const1174 int GetHitsSortOption() const { 1175 return m_HitsSortOption; 1176 } GetHspsSortOption() const1177 int GetHspsSortOption() const { 1178 return m_HspsSortOption; 1179 } GetCustomDelimiter()1180 string GetCustomDelimiter(){return m_CustomDelim;} 1181 1182 protected: 1183 EOutputFormat m_OutputFormat; ///< Choice of formatting output 1184 bool m_ShowGis; ///< Display NCBI GIs? 1185 TSeqPos m_NumDescriptions; ///< Number of 1-line descr. to show 1186 TSeqPos m_NumAlignments; ///< Number of alignments to show 1187 TSeqPos m_DfltNumDescriptions; ///< Default value for num descriptions 1188 TSeqPos m_DfltNumAlignments; ///< Default value for num alignments 1189 bool m_Html; ///< Display HTML output? 1190 bool m_IsIgBlast; ///< IgBlast has a different default num_alignments 1191 /// The format specification for custom output, e.g.: tabular or 1192 /// comma-separated value (populated if applicable) 1193 string m_CustomOutputFormatSpec; 1194 size_t m_LineLength; 1195 EFormatFlags m_FormatFlags; 1196 int m_HitsSortOption; 1197 int m_HspsSortOption; 1198 string m_CustomDelim; 1199 }; 1200 1201 /// Formatting args for magicblast advertising only SAM and fast tabular 1202 /// formats 1203 class NCBI_BLASTINPUT_EXPORT CMapperFormattingArgs : public CFormattingArgs 1204 { 1205 public: 1206 CMapperFormattingArgs(void)1207 CMapperFormattingArgs(void) : 1208 CFormattingArgs(), 1209 m_TrimReadIds(true), 1210 m_PrintUnaligned(true), 1211 m_NoDiscordant(false), 1212 m_FwdRev(false), 1213 m_RevFwd(false), 1214 m_FwdOnly(false), 1215 m_RevOnly(false), 1216 m_OnlyStrandSpecific(false), 1217 m_PrintMdTag(false), 1218 m_UnalignedOutputFormat(eSAM) 1219 {} 1220 1221 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 1222 1223 virtual void ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt); 1224 ArchiveFormatRequested(const CArgs &) const1225 virtual bool ArchiveFormatRequested(const CArgs& /*args*/) const { 1226 return false; 1227 } 1228 1229 /// Should read ids be in SAM format be trimmed of .1 and .2 endings 1230 /// for paired mapping TrimReadIds(void) const1231 bool TrimReadIds(void) const {return m_TrimReadIds;} 1232 1233 /// Should unaligned reads be reported PrintUnaligned(void) const1234 bool PrintUnaligned(void) const {return m_PrintUnaligned;} 1235 1236 /// Should non-concordant pairs be filtered out of report NoDiscordant(void) const1237 bool NoDiscordant(void) const {return m_NoDiscordant;} 1238 1239 /// Specify fwd/ref strands SelectFwdRev(void) const1240 bool SelectFwdRev(void) const {return m_FwdRev;} 1241 1242 /// Specify rev/fwd strands SelectRevFwd(void) const1243 bool SelectRevFwd(void) const {return m_RevFwd;} 1244 1245 /// Specify fwd-only strands SelectFwdOnly(void) const1246 bool SelectFwdOnly(void) const {return m_FwdOnly;} 1247 1248 /// Specify rev-only strands SelectRevOnly(void) const1249 bool SelectRevOnly(void) const {return m_RevOnly;} 1250 1251 /// Specify only-strand-specific SelectOnlyStrandSpecific(void) const1252 bool SelectOnlyStrandSpecific(void) const {return m_OnlyStrandSpecific;} 1253 1254 /// Should MD tag be included in SAM report PrintMdTag(void) const1255 bool PrintMdTag(void) const {return m_PrintMdTag;} 1256 1257 /// Get format choice for unaligned reads GetUnalignedOutputFormat(void) const1258 EOutputFormat GetUnalignedOutputFormat(void) const 1259 {return m_UnalignedOutputFormat;} 1260 1261 private: 1262 bool m_TrimReadIds; 1263 bool m_PrintUnaligned; 1264 bool m_NoDiscordant; 1265 bool m_FwdRev; 1266 bool m_RevFwd; 1267 bool m_FwdOnly; 1268 bool m_RevOnly; 1269 bool m_OnlyStrandSpecific; 1270 bool m_PrintMdTag; 1271 EOutputFormat m_UnalignedOutputFormat; 1272 }; 1273 1274 /// Argument class to collect multi-threaded arguments 1275 class NCBI_BLASTINPUT_EXPORT CMTArgs : public IBlastCmdLineArgs 1276 { 1277 public: 1278 enum EMTMode { 1279 eNotSupported = -1, 1280 eSplitByDB, 1281 eSplitByQueries 1282 }; 1283 /// Default Constructor CMTArgs(size_t default_num_threads=CThreadable::kMinNumThreads,EMTMode mt_mode=eNotSupported)1284 CMTArgs(size_t default_num_threads = CThreadable::kMinNumThreads, EMTMode mt_mode = eNotSupported) : 1285 m_NumThreads(default_num_threads), m_MTMode(mt_mode) 1286 { 1287 #ifdef NCBI_NO_THREADS 1288 // No threads can be set in NON-MT mode 1289 m_NumThreads = CThreadable::kMinNumThreads; 1290 m_MTMode = eNotSupported; 1291 #endif 1292 } 1293 CMTArgs(const CArgs& cmd_line_args); 1294 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 1295 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 1296 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 1297 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 1298 CBlastOptions& options); 1299 1300 /// Get the number of threads to spawn GetNumThreads() const1301 size_t GetNumThreads() const { return m_NumThreads; } 1302 GetMTMode() const1303 int GetMTMode() const { return m_MTMode; } 1304 1305 protected: 1306 void x_ExtractAlgorithmOptions(const CArgs& args); 1307 size_t m_NumThreads; ///< Number of threads to spawn 1308 EMTMode m_MTMode; 1309 }; 1310 1311 /// Argument class to collect remote vs. local execution 1312 class NCBI_BLASTINPUT_EXPORT CRemoteArgs : public IBlastCmdLineArgs 1313 { 1314 public: 1315 /// Default constructor CRemoteArgs()1316 CRemoteArgs() : m_IsRemote(false) {} 1317 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 1318 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 1319 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 1320 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 1321 CBlastOptions& options); 1322 1323 /// Return whether the search should be executed remotely or not ExecuteRemotely() const1324 bool ExecuteRemotely() const { return m_IsRemote; } 1325 1326 private: 1327 /// Should the search be executed remotely? 1328 bool m_IsRemote; 1329 }; 1330 1331 /// Argument class to collect debugging options. 1332 /// Only show in command line if compiled with _BLAST_DEBUG 1333 class NCBI_BLASTINPUT_EXPORT CDebugArgs : public IBlastCmdLineArgs 1334 { 1335 public: 1336 /// Default constructor CDebugArgs()1337 CDebugArgs() : m_DebugOutput(false), m_RmtDebugOutput(false) {} 1338 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 1339 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 1340 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 1341 virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args, 1342 CBlastOptions& options); 1343 1344 /// Return whether debug (verbose) output should be produced on remote 1345 /// searches (only available when compiled with _DEBUG) ProduceDebugRemoteOutput() const1346 bool ProduceDebugRemoteOutput() const { return m_RmtDebugOutput; } 1347 /// Return whether debug (verbose) output should be produced 1348 /// (only available when compiled with _DEBUG) ProduceDebugOutput() const1349 bool ProduceDebugOutput() const { return m_DebugOutput; } 1350 private: 1351 1352 /// Should debugging (verbose) output be printed 1353 bool m_DebugOutput; 1354 /// Should debugging (verbose) output be printed for remote BLAST 1355 bool m_RmtDebugOutput; 1356 }; 1357 1358 /// Argument class to retrieve options for filtering HSPs (e.g.: culling 1359 /// options, best hit algorithm options) 1360 class NCBI_BLASTINPUT_EXPORT CHspFilteringArgs : public IBlastCmdLineArgs 1361 { 1362 public: 1363 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 1364 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 1365 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 1366 virtual void ExtractAlgorithmOptions(const CArgs& args, 1367 CBlastOptions& opts); 1368 }; 1369 1370 /// Argument class to retrieve megablast database indexing options 1371 class NCBI_BLASTINPUT_EXPORT CMbIndexArgs : public IBlastCmdLineArgs 1372 { 1373 public: 1374 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 1375 virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc); 1376 /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */ 1377 virtual void ExtractAlgorithmOptions(const CArgs& args, 1378 CBlastOptions& opts); 1379 1380 /// Auxiliary function to determine if the megablast database indexing 1381 /// options have been set 1382 static bool HasBeenSet(const CArgs& args); 1383 }; 1384 1385 /// Type definition of a container of IBlastCmdLineArgs 1386 typedef vector< CRef<IBlastCmdLineArgs> > TBlastCmdLineArgs; 1387 1388 1389 /// Base command line argument class for a generic BLAST command line binary 1390 class NCBI_BLASTINPUT_EXPORT CBlastAppArgs : public CObject 1391 { 1392 public: 1393 /// Default constructor 1394 CBlastAppArgs(); 1395 /// Our virtual destructor ~CBlastAppArgs()1396 virtual ~CBlastAppArgs() {} 1397 1398 /// Set the command line arguments 1399 CArgDescriptions* SetCommandLine(); 1400 1401 /// Get the task for this object GetTask() const1402 string GetTask() const { 1403 return m_Task; 1404 } 1405 1406 /// Set the task for this object 1407 /// @param task task name to set [in] 1408 void SetTask(const string& task); 1409 1410 /// Extract the command line arguments into a CBlastOptionsHandle object 1411 /// @param args Commad line arguments [in] 1412 CRef<CBlastOptionsHandle> SetOptions(const CArgs& args); 1413 1414 /// Combine the command line arguments into a CBlastOptions object 1415 /// recovered from saved search strategy 1416 /// @param args Commad line arguments [in] 1417 CRef<CBlastOptionsHandle> SetOptionsForSavedStrategy(const CArgs& args); 1418 1419 /// Setter for the BLAST options handle, this is used if the options are 1420 /// recovered from a saved BLAST search strategy SetOptionsHandle(CRef<CBlastOptionsHandle> opts_hndl)1421 void SetOptionsHandle(CRef<CBlastOptionsHandle> opts_hndl) { 1422 m_OptsHandle = opts_hndl; 1423 } 1424 1425 /// Get the BLAST database arguments GetBlastDatabaseArgs() const1426 CRef<CBlastDatabaseArgs> GetBlastDatabaseArgs() const { 1427 return m_BlastDbArgs; 1428 } 1429 /// Set the BLAST database arguments SetBlastDatabaseArgs(CRef<CBlastDatabaseArgs> args)1430 void SetBlastDatabaseArgs(CRef<CBlastDatabaseArgs> args) { 1431 m_BlastDbArgs = args; 1432 } 1433 1434 /// Get the options for the query sequence(s) GetQueryOptionsArgs() const1435 CRef<CQueryOptionsArgs> GetQueryOptionsArgs() const { 1436 return m_QueryOptsArgs; 1437 } 1438 1439 /// Get the formatting options GetFormattingArgs() const1440 CRef<CFormattingArgs> GetFormattingArgs() const { 1441 return m_FormattingArgs; 1442 } 1443 1444 /// Get the number of threads to spawn GetNumThreads() const1445 size_t GetNumThreads() const { 1446 return m_MTArgs->GetNumThreads(); 1447 } 1448 1449 /// Get the input stream 1450 virtual CNcbiIstream& GetInputStream(); 1451 1452 /// Get the output stream 1453 virtual CNcbiOstream& GetOutputStream(); 1454 1455 /// Set the input stream to a temporary input file (needed when importing 1456 /// a search strategy) 1457 /// @param input_file temporary input file to read [in] SetInputStream(CRef<CTmpFile> input_file)1458 void SetInputStream(CRef<CTmpFile> input_file) { 1459 m_StdCmdLineArgs->SetInputStream(input_file); 1460 } 1461 1462 /// Get the input stream for the search strategy GetImportSearchStrategyStream(const CArgs & args)1463 CNcbiIstream* GetImportSearchStrategyStream(const CArgs& args) { 1464 return m_SearchStrategyArgs->GetImportStream(args); 1465 } 1466 /// Get the output stream for the search strategy GetExportSearchStrategyStream(const CArgs & args)1467 CNcbiOstream* GetExportSearchStrategyStream(const CArgs& args) { 1468 return m_SearchStrategyArgs->GetExportStream(args); 1469 } 1470 1471 /// Determine whether the search should be executed remotely or not ExecuteRemotely() const1472 bool ExecuteRemotely() const { 1473 return m_RemoteArgs->ExecuteRemotely(); 1474 } 1475 1476 /// Return whether debug (verbose) output should be produced on remote 1477 /// searches (only available when compiled with _DEBUG) ProduceDebugRemoteOutput() const1478 bool ProduceDebugRemoteOutput() const { 1479 return m_DebugArgs->ProduceDebugRemoteOutput(); 1480 } 1481 1482 /// Return whether debug (verbose) output should be produced on remote 1483 /// searches (only available when compiled with _DEBUG) ProduceDebugOutput() const1484 bool ProduceDebugOutput() const { 1485 return m_DebugArgs->ProduceDebugOutput(); 1486 } 1487 1488 /// Get the query batch size 1489 virtual int GetQueryBatchSize() const = 0; 1490 1491 /// Retrieve the client ID for remote requests GetClientId() const1492 string GetClientId() const { 1493 _ASSERT( !m_ClientId.empty() ); 1494 return m_ClientId; 1495 } 1496 1497 protected: 1498 /// Set of command line argument objects 1499 TBlastCmdLineArgs m_Args; 1500 /// query options object 1501 CRef<CQueryOptionsArgs> m_QueryOptsArgs; 1502 /// database/subject object 1503 CRef<CBlastDatabaseArgs> m_BlastDbArgs; 1504 /// formatting options 1505 CRef<CFormattingArgs> m_FormattingArgs; 1506 /// multi-threaded options 1507 CRef<CMTArgs> m_MTArgs; 1508 /// remote vs. local execution options 1509 CRef<CRemoteArgs> m_RemoteArgs; 1510 /// standard command line arguments class 1511 CRef<CStdCmdLineArgs> m_StdCmdLineArgs; 1512 /// arguments for dealing with search strategies 1513 CRef<CSearchStrategyArgs> m_SearchStrategyArgs; 1514 /// Debugging arguments 1515 CRef<CDebugArgs> m_DebugArgs; 1516 /// HSP filtering arguments 1517 CRef<CHspFilteringArgs> m_HspFilteringArgs; 1518 /// The BLAST options handle, only non-NULL if assigned via 1519 /// SetOptionsHandle, i.e.: from a saved search strategy 1520 CRef<CBlastOptionsHandle> m_OptsHandle; 1521 /// Task specified in the command line 1522 string m_Task; 1523 /// Client ID used for remote BLAST submissions, must be populated by 1524 /// subclasses 1525 string m_ClientId; 1526 /// Is this application being run ungapped 1527 bool m_IsUngapped; 1528 1529 /// Create the options handle based on the command line arguments 1530 /// @param locality whether the search will be executed locally or remotely 1531 /// [in] 1532 /// @param args command line arguments [in] 1533 virtual CRef<CBlastOptionsHandle> 1534 x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, 1535 const CArgs& args) = 0; 1536 1537 /** Creates the BLAST options handle based on the task argument 1538 * @param locality whether the search will be executed locally or remotely [in] 1539 * @param task program-specific BLAST named parameter set [in] 1540 */ 1541 CRef<CBlastOptionsHandle> 1542 x_CreateOptionsHandleWithTask(CBlastOptions::EAPILocality locality, 1543 const string& task); 1544 1545 /// Issue warnings when recovering from a search strategy (command line 1546 /// applications only) 1547 void x_IssueWarningsForIgnoredOptions(const CArgs& args); 1548 }; 1549 1550 /** 1551 * @brief Create a CArgDescriptions object and invoke SetArgumentDescriptions 1552 * for each of the TBlastCmdLineArgs in its argument list 1553 * 1554 * @param args arguments to configure the return value [in] 1555 * 1556 * @return a CArgDescriptions object with the command line options set 1557 */ 1558 NCBI_BLASTINPUT_EXPORT 1559 CArgDescriptions* 1560 SetUpCommandLineArguments(TBlastCmdLineArgs& args); 1561 1562 END_SCOPE(blast) 1563 END_NCBI_SCOPE 1564 1565 #endif /* ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP */ 1566