1 /*  $Id: blast_args.hpp 631554 2021-05-19 13:52:23Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Jason Papadopoulos
27  *
28  */
29 
30 /** @file blast_args.hpp
31  * Interface for converting blast-related command line
32  * arguments into blast options
33  */
34 
35 #ifndef ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP
36 #define ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP
37 
38 #include <corelib/ncbistd.hpp>
39 #include <corelib/ncbiargs.hpp>
40 #include <algo/blast/api/uniform_search.hpp>
41 #include <algo/blast/api/blast_options.hpp>
42 #include <algo/blast/api/blast_options_handle.hpp>
43 #include <algo/blast/igblast/igblast.hpp>
44 #include <algo/blast/api/setup_factory.hpp> // for CThreadable
45 #include <algo/blast/blastinput/cmdline_flags.hpp>
46 #include <algo/blast/blastinput/blast_input_aux.hpp>
47 
48 #include <objmgr/scope.hpp>     // for CScope
49 #include <objects/seqloc/Na_strand.hpp>
50 #include <objects/scoremat/PssmWithParameters.hpp>
51 
52 #include <util/compress/stream_util.hpp>
53 
54 BEGIN_NCBI_SCOPE
55 BEGIN_SCOPE(blast)
56 
57 /**
58  * BLAST Command line arguments design
59  * The idea is to have several small objects (subclasses of IBlastCmdLineArgs)
60  * which can do two things:
61  * 1) On creation, add flags/options/etc to a CArgs object
62  * 2) When passed in a CBlastOptions object, call the appropriate methods based
63  * on the CArgs options set when the NCBI application framework parsed the
64  * command line. If data collected by the small object (from the command line)
65  * cannot be applied to the CBlastOptions object, then it's provided to the
66  * application via some other interface methods.
67  *
68  * Each command line application will have its own argument class (e.g.:
69  * CPsiBlastAppArgs), which will contain several of the aformentioned small
70  * objects. It will create and hold a reference to a CArgs class as well as
71  * a CBlastOptionsHandle object, which will pass to each of its small objects
72  * aggregated as data members and then return it to the caller (application)
73  *
74  * Categories of data to extract from command line options
75  * 1) BLAST algorithm options
76  * 2) Input/Output files, and their modifiers (e.g.: believe query defline)
77  * 3) BLAST database information (names, limitations, num db seqs)
78  * 4) Formatting options (html, display formats, etc)
79 */
80 
81 /** Interface definition for a generic command line option for BLAST
82  */
83 class NCBI_BLASTINPUT_EXPORT IBlastCmdLineArgs : public CObject
84 {
85 public:
86     /** Our virtual destructor */
~IBlastCmdLineArgs()87     virtual ~IBlastCmdLineArgs() {}
88 
89     /** Sets the command line descriptions in the CArgDescriptions object
90      * relevant to the subclass
91      * @param arg_desc the argument descriptions object [in|out]
92      */
93     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc) = 0;
94 
95     /** Extracts BLAST algorithmic options from the command line arguments into
96      * the CBlastOptions object. Default implementation does nothing.
97      * @param cmd_line_args Command line arguments parsed by the NCBI
98      * application framework [in]
99      * @param options object to which the appropriate options will be set
100      * [in|out]
101      */
102     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
103                                          CBlastOptions& options);
104 };
105 
106 /** Argument class to retrieve input and output streams for a command line
107  * program.
108  */
109 class NCBI_BLASTINPUT_EXPORT CStdCmdLineArgs : public IBlastCmdLineArgs
110 {
111 public:
112     /** Default constructor */
CStdCmdLineArgs()113     CStdCmdLineArgs() : m_InputStream(0), m_OutputStream(0),
114                         m_GzipEnabled(false),
115                         m_SRAaccessionEnabled(false),
116                         m_UnalignedOutputStream(0) {};
117     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
118     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
119     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
120     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
121                                          CBlastOptions& options);
122     /** Get the input stream for a command line application */
123     CNcbiIstream& GetInputStream() const;
124     /** Get the output stream for a command line application */
125     CNcbiOstream& GetOutputStream() const;
126     /** Set the input stream if read from a saved search strategy */
127     void SetInputStream(CRef<CTmpFile> input_file);
128 
129     /** Set automatic decompression of the input file is file name is
130      * recognized
131      * @param g If true input file will be unzgipped if the file name ends with
132      *          ".gz" [in]
133      */
SetGzipEnabled(bool g)134     void SetGzipEnabled(bool g) {m_GzipEnabled = g;}
135 
136     /** enables sra accession flag
137      * @param g If true "-sra" will be added (not compatible with "-query")
138      */
SetSRAaccessionEnabled(bool g)139     void SetSRAaccessionEnabled(bool g) {m_SRAaccessionEnabled = g;}
140 
141     /** Is there a separate output stream for unaligned sequences/reads
142      *  (for magicblast)
143      *  @return True if separate output stream has been set up, otherwise false
144      */
HasUnalignedOutputStream(void) const145     bool HasUnalignedOutputStream(void) const {return m_UnalignedOutputStream;}
146 
147     /** Get output stream for unaligned sequences/reads (for magicblast)
148      *  @return Output stream for unaligned reads or NULL
149      */
GetUnalignedOutputStream() const150     CNcbiOstream* GetUnalignedOutputStream() const
151     {return m_UnalignedOutputStream;}
152 
153 private:
154     CNcbiIstream* m_InputStream;    ///< Application's input stream
155     CNcbiOstream* m_OutputStream;   ///< Application's output stream
156     auto_ptr<CDecompressIStream> m_DecompressIStream;
157     auto_ptr<CCompressOStream> m_CompressOStream;
158 
159     /// ASN.1 specification of query sequences when read from a saved search
160     /// strategy
161     CRef<CTmpFile> m_QueryTmpInputFile;
162 
163     /// If true input file will be decompressed with gzip if filename ends
164     /// with ".gz"
165     bool m_GzipEnabled;
166 
167     /// If true, option to specify SRA runs will be presented  as possible
168     /// query input
169     bool m_SRAaccessionEnabled;
170 
171     /// Output stream to report unaligned sequences/reads
172     CNcbiOstream* m_UnalignedOutputStream;
173     unique_ptr<CCompressOStream> m_UnalignedCompressOStream;
174 };
175 
176 /** Argument class to populate an application's name and description */
177 class NCBI_BLASTINPUT_EXPORT CProgramDescriptionArgs : public IBlastCmdLineArgs
178 {
179 public:
180     /**
181      * @brief Constructor
182      *
183      * @param program_name application's name [in]
184      * @param program_description application's description [in]
185      */
186     CProgramDescriptionArgs(const string& program_name,
187                             const string& program_description);
188     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
189     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
190 
191 protected:
192     string m_ProgName;  ///< Application's name
193     string m_ProgDesc;  ///< Application's description
194 };
195 
196 /// Argument class to specify the supported tasks a given program
197 class NCBI_BLASTINPUT_EXPORT CTaskCmdLineArgs : public IBlastCmdLineArgs
198 {
199 public:
200     /** Constructor
201      * @param supported_tasks list of supported tasks [in]
202      * @param default_task One of the tasks above, to be displayed as
203      * default in the command line arguments (cannot be empty or absent from
204      * the set above) [in]
205      */
206     CTaskCmdLineArgs(const set<string>& supported_tasks,
207                      const string& default_task);
208     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
209     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
210     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
211     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
212                                          CBlastOptions& options);
213 private:
214     /// Set of supported tasks by this command line argument
215     const set<string> m_SupportedTasks;
216     /// Default task for this command line argument
217     string m_DefaultTask;
218 };
219 
220 /** Argument class to retrieve and set the window size BLAST algorithm
221  * option */
222 class NCBI_BLASTINPUT_EXPORT CWindowSizeArg : public IBlastCmdLineArgs
223 {
224 public:
225     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
226     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
227     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions
228      * @note this depends on the matrix already being set...
229      */
230     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
231                                          CBlastOptions& options);
232 };
233 
234 /** Argument class to retrieve and set the off-diagonal range used in 2-hit
235     algorithm */
236 class NCBI_BLASTINPUT_EXPORT COffDiagonalRangeArg : public IBlastCmdLineArgs
237 {
238 public:
239     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
240     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
241     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions
242      * @note this depends on the matrix already being set...
243      */
244     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
245                                          CBlastOptions& options);
246 };
247 
248 /** Argument class to retrieve and set the word threshold BLAST algorithm
249  * option */
250 class NCBI_BLASTINPUT_EXPORT CWordThresholdArg : public IBlastCmdLineArgs
251 {
252 public:
253     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
254     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
255     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions
256      * @note this depends on the matrix already being set...
257      */
258     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
259                                          CBlastOptions& options);
260 };
261 
262 /** RMH: Argument class to retrieve and set the options specific to
263  *       the RMBlastN algorithm
264  */
265 class NCBI_BLASTINPUT_EXPORT CRMBlastNArg : public IBlastCmdLineArgs
266 {
267 public:
268     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
269     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
270     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
271     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
272                                          CBlastOptions& options);
273 };
274 
275 /** Argument class to retrieve and set the scoring matrix name BLAST algorithm
276  * option */
277 class NCBI_BLASTINPUT_EXPORT CMatrixNameArg : public IBlastCmdLineArgs
278 {
279 public:
280     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
281     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
282     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
283     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
284                                          CBlastOptions& options);
285 };
286 
287 /** Argument class for general search BLAST algorithm options: evalue, gap
288  * penalties, query filter string, ungapped x-drop, initial and final gapped
289  * x-drop, word size, percent identity, and effective search space.
290  */
291 class NCBI_BLASTINPUT_EXPORT CGenericSearchArgs : public IBlastCmdLineArgs
292 {
293 public:
294     /**
295      * @brief Constructor
296      *
297      * @param query_is_protein is the query sequence(s) protein? [in]
298      * @param is_rpsblast is it RPS-BLAST? [in]
299      * @param show_perc_identity should the percent identity be shown?
300      * @param is_igblast is it IG-BLAST? [in]
301      * Currently only supported for blastn [in]
302      */
CGenericSearchArgs(bool query_is_protein=true,bool is_rpsblast=false,bool show_perc_identity=false,bool is_tblastx=false,bool is_igblast=false,bool suppress_sum_stats=false)303     CGenericSearchArgs(bool query_is_protein = true, bool is_rpsblast = false,
304                        bool show_perc_identity = false, bool is_tblastx = false,
305                        bool is_igblast = false, bool suppress_sum_stats = false)
306         : m_QueryIsProtein(query_is_protein), m_IsRpsBlast(is_rpsblast),
307           m_ShowPercentIdentity(show_perc_identity), m_IsTblastx(is_tblastx),
308           m_IsIgBlast(is_igblast), m_SuppressSumStats(suppress_sum_stats) {}
309 
310     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
311     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
312     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
313     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
314                                          CBlastOptions& options);
315 private:
316     bool m_QueryIsProtein;  /**< true if the query is protein */
317     bool m_IsRpsBlast;      /**< true if the search is RPS-BLAST */
318     bool m_ShowPercentIdentity; /**< true if the percent identity option should
319                                  be shown */
320     bool m_IsTblastx; /**< true if the search is tblastx */
321     bool m_IsIgBlast; /**< true if the search is igblast */
322     bool m_SuppressSumStats; /**< true if search is blastn or blastp */
323 };
324 
325 /** Argument class for collecting filtering options */
326 class NCBI_BLASTINPUT_EXPORT CFilteringArgs : public IBlastCmdLineArgs
327 {
328 public:
329     /**
330      * @brief Constructor
331      *
332      * @param query_is_protein is the query sequence(s) protein? [in]
333      * @param filter_by_default should filtering be applied by default? [in]
334      */
CFilteringArgs(bool query_is_protein=true,bool filter_by_default=true)335     CFilteringArgs(bool query_is_protein = true,
336                    bool filter_by_default = true)
337         : m_QueryIsProtein(query_is_protein),
338           m_FilterByDefault(filter_by_default) {}
339 
340     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
341     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
342     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
343     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
344                                          CBlastOptions& options);
345 private:
346     bool m_QueryIsProtein;  /**< true if the query is protein */
347     bool m_FilterByDefault; /**< Should filtering be applied by default? */
348 
349     /**
350      * @brief Auxiliary method to tokenize the filtering string.
351      *
352      * @param filtering_args string to tokenize [in]
353      * @param output vector with tokens [in|out]
354      */
355     void x_TokenizeFilteringArgs(const string& filtering_args,
356                                  vector<string>& output) const;
357 };
358 
359 /// Defines values for match and mismatch in nucleotide comparisons as well as
360 /// non-greedy extension
361 class NCBI_BLASTINPUT_EXPORT CNuclArgs : public IBlastCmdLineArgs
362 {
363 public:
364     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
365     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
366     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
367     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
368                                          CBlastOptions& options);
369 };
370 
371 /// Argument class to retrieve discontiguous megablast arguments
372 class NCBI_BLASTINPUT_EXPORT CDiscontiguousMegablastArgs : public IBlastCmdLineArgs
373 {
374 public:
375     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
376     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
377     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
378     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
379                                          CBlastOptions& options);
380 
381     /// Value to specify coding template type
382     static const string kTemplType_Coding;
383     /// Value to specify optimal template type
384     static const string kTemplType_Optimal;
385     /// Value to specify coding+optimal template type
386     static const string kTemplType_CodingAndOptimal;
387 };
388 
389 /** Argument class for collecting composition based statistics options */
390 class NCBI_BLASTINPUT_EXPORT CCompositionBasedStatsArgs : public IBlastCmdLineArgs
391 {
392 public:
393     /// Constructor
394     ///@param is_2and3supported Are composition based statistics options 2 and
395     /// 3 supported [in]
396     ///@param default_option Default composition based satatistics option [in]
397     ///@param zero_option_descr Non-standard description for composition
398     /// based statistics option zero [in]
CCompositionBasedStatsArgs(bool is_2and3supported=true,const string & default_option=kDfltArgCompBasedStats,const string & zero_option_descr="")399     CCompositionBasedStatsArgs(bool is_2and3supported = true,
400                                const string& default_option
401                                = kDfltArgCompBasedStats,
402                                const string& zero_option_descr = "")
403         : m_Is2and3Supported(is_2and3supported),
404           m_DefaultOpt(default_option),
405           m_ZeroOptDescr(zero_option_descr) {}
406 
407     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
408     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
409     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
410     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
411                                          CBlastOptions& options);
412 
413 protected:
414     /// Are options 2 and 3 supported
415     bool m_Is2and3Supported;
416     /// Default option
417     string m_DefaultOpt;
418     /// Non standard description for option zero
419     string m_ZeroOptDescr;
420 };
421 
422 /** Argument class for collecting gapped options */
423 class NCBI_BLASTINPUT_EXPORT CGappedArgs : public IBlastCmdLineArgs
424 {
425 public:
426     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
427     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
428     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
429     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
430                                          CBlastOptions& options);
431 };
432 
433 /** Argument class for collecting the largest intron size */
434 class NCBI_BLASTINPUT_EXPORT CLargestIntronSizeArgs : public IBlastCmdLineArgs
435 {
436 public:
437     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
438     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
439     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
440     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
441                                          CBlastOptions& options);
442 };
443 
444 /// Argument class to collect the frame shift penalty for out-of-frame searches
445 class NCBI_BLASTINPUT_EXPORT CFrameShiftArgs : public IBlastCmdLineArgs
446 {
447 public:
448     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
449     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
450     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
451     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
452                                          CBlastOptions& options);
453 };
454 
455 /// Argument class to collect the genetic code for all queries/subjects
456 class NCBI_BLASTINPUT_EXPORT CGeneticCodeArgs : public IBlastCmdLineArgs
457 {
458 public:
459     /// Enumeration defining which sequences the genetic code applies to
460     enum ETarget {
461         eQuery,         ///< Query genetic code
462         eDatabase       ///< Database genetic code
463     };
464 
465 
466     /**
467      * @brief Constructor
468      *
469      * @param t genetic code target (query or database)
470      */
CGeneticCodeArgs(ETarget t)471     CGeneticCodeArgs(ETarget t) : m_Target(t) {};
472 
473     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
474     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
475     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
476     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
477                                          CBlastOptions& options);
478 
479 private:
480     ETarget m_Target; ///< Genetic code target
481 };
482 
483 /// Argument class to retrieve the gap trigger option
484 class NCBI_BLASTINPUT_EXPORT CGapTriggerArgs : public IBlastCmdLineArgs
485 {
486 public:
487     /**
488      * @brief Constructor
489      *
490      * @param query_is_protein is the query sequence(s) protein?
491      */
CGapTriggerArgs(bool query_is_protein)492     CGapTriggerArgs(bool query_is_protein)
493         : m_QueryIsProtein(query_is_protein) {}
494     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
495     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
496     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
497     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
498                                          CBlastOptions& options);
499 private:
500     bool m_QueryIsProtein;  /**< true if the query is protein */
501 };
502 
503 /// Argument class to collect PSSM engine options
504 class NCBI_BLASTINPUT_EXPORT CPssmEngineArgs : public IBlastCmdLineArgs
505 {
506 public:
507     /// Constructor
508     /// @param is_deltablast Are the aruments set up for Delta Blast [in]
CPssmEngineArgs(bool is_deltablast=false)509     CPssmEngineArgs(bool is_deltablast = false) : m_IsDeltaBlast(is_deltablast)
510     {}
511 
512     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
513     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
514     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
515     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
516                                          CBlastOptions& options);
517 
518 private:
519     /// Are these arumnets for Delta Blast
520     bool m_IsDeltaBlast;
521 };
522 
523 /// Argument class to import/export the search strategy
524 class NCBI_BLASTINPUT_EXPORT CSearchStrategyArgs : public IBlastCmdLineArgs
525 {
526 public:
527     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
528     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
529     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
530     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
531                                          CBlastOptions& options);
532 
533     /// Get the input stream for the search strategy
534     CNcbiIstream* GetImportStream(const CArgs& args) const;
535     /// Get the output stream for the search strategy
536     CNcbiOstream* GetExportStream(const CArgs& args) const;
537 };
538 
539 /// Argument class to collect options specific to PSI-BLAST
540 class NCBI_BLASTINPUT_EXPORT CPsiBlastArgs : public IBlastCmdLineArgs
541 {
542 public:
543     /// Enumeration to determine the molecule type of the database
544     enum ETargetDatabase {
545         eProteinDb,         ///< Traditional, iterated PSI-BLAST
546         eNucleotideDb       ///< PSI-Tblastn, non-iterated
547     };
548 
549     /**
550      * @brief Constructor
551      *
552      * @param db_target Molecule type of the database
553      * @param is_deltablast Are the aruments set up for Delta Blast
554      */
CPsiBlastArgs(ETargetDatabase db_target=eProteinDb,bool is_deltablast=false)555     CPsiBlastArgs(ETargetDatabase db_target = eProteinDb,
556                   bool is_deltablast = false)
557         : m_DbTarget(db_target), m_NumIterations(1),
558           m_CheckPointOutput(0), m_AsciiMatrixOutput(0),
559           m_IsDeltaBlast(is_deltablast),
560           m_SaveLastPssm(false)
561     {};
562 
563     /// Our virtual destructor
~CPsiBlastArgs()564     virtual ~CPsiBlastArgs() {}
565 
566     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
567     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
568     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
569     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
570                                          CBlastOptions& options);
571 
572     /// Retrieve the number of iterations to perform
GetNumberOfIterations() const573     size_t GetNumberOfIterations() const {
574         return m_NumIterations;
575     }
576 
577     /// Retrieve the number of iterations to perform
SetNumberOfIterations(unsigned int num_iters)578     void SetNumberOfIterations(unsigned int num_iters) {
579             m_NumIterations = num_iters;
580     }
581     /// Returns true if checkpoint PSSM is required to be printed
RequiresCheckPointOutput() const582     bool RequiresCheckPointOutput() const {
583         return m_CheckPointOutput != NULL;
584     }
585     /// Get the checkpoint file output stream
586     /// @return pointer to output stream, not to be free'd by the caller
GetCheckPointOutputStream()587     CNcbiOstream* GetCheckPointOutputStream() {
588         return m_CheckPointOutput ? m_CheckPointOutput->GetStream() : NULL;
589     }
590     /// Returns true if ASCII PSSM is required to be printed
RequiresAsciiPssmOutput() const591     bool RequiresAsciiPssmOutput() const {
592         return m_AsciiMatrixOutput != NULL;
593     }
594     /// Get the ASCII matrix output stream
595     /// @return pointer to output stream, not to be free'd by the caller
GetAsciiMatrixOutputStream()596     CNcbiOstream* GetAsciiMatrixOutputStream() {
597         return m_AsciiMatrixOutput ? m_AsciiMatrixOutput->GetStream() : NULL;
598     }
599 
600     /// Get the PSSM read from checkpoint file
GetInputPssm() const601     CRef<objects::CPssmWithParameters> GetInputPssm() const {
602         return m_Pssm;
603     }
604 
605     /// Set the PSSM read from saved search strategy
SetInputPssm(CRef<objects::CPssmWithParameters> pssm)606     void SetInputPssm(CRef<objects::CPssmWithParameters> pssm) {
607         m_Pssm = pssm;
608     }
609 
610     /// Should the PSSM after the last database search be saved
GetSaveLastPssm(void) const611     bool GetSaveLastPssm(void) const {
612         return m_SaveLastPssm;
613     }
614 
615     /// Set the on/off switch for saving PSSM after the last database search
SetSaveLastPssm(bool b)616     void SetSaveLastPssm(bool b) {
617         m_SaveLastPssm = b;
618     }
619 
620 private:
621     /// Molecule of the database
622     ETargetDatabase m_DbTarget;
623     /// number of iterations to perform
624     size_t m_NumIterations;
625     /// checkpoint output file
626     CRef<CAutoOutputFileReset> m_CheckPointOutput;
627     /// ASCII matrix output file
628     CRef<CAutoOutputFileReset> m_AsciiMatrixOutput;
629     /// PSSM
630     CRef<objects::CPssmWithParameters> m_Pssm;
631 
632     /// Are the aruments set up for Delta Blast
633     bool m_IsDeltaBlast;
634 
635     /// Save PSSM after the last database search
636     bool m_SaveLastPssm;
637 
638     /// Prohibit copy constructor
639     CPsiBlastArgs(const CPsiBlastArgs& rhs);
640     /// Prohibit assignment operator
641     CPsiBlastArgs& operator=(const CPsiBlastArgs& rhs);
642 
643     /// Auxiliary function to create a PSSM from a multiple sequence alignment
644     /// file
645     CRef<objects::CPssmWithParameters>
646     x_CreatePssmFromMsa(CNcbiIstream& input_stream, CBlastOptions& opt,
647                         bool save_ascii_pssm, unsigned int msa_master_idx,
648                         bool ignore_pssm_tmpl_seq);
649 };
650 
651 /// Argument class to collect options specific to PHI-BLAST
652 class NCBI_BLASTINPUT_EXPORT CPhiBlastArgs : public IBlastCmdLineArgs
653 {
654 public:
655     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
656     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
657     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
658     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
659                                          CBlastOptions& options);
660 };
661 
662 /// Argument class to collect options specific to KBLASTP
663 class NCBI_BLASTINPUT_EXPORT CKBlastpArgs : public IBlastCmdLineArgs
664 {
665 public:
666 
667     /// Constructor
CKBlastpArgs(void)668     CKBlastpArgs(void) : m_JDistance(0.10), m_MinHits(0), m_CandidateSeqs(1000) {}
669 
670     /// Our virtual destructor
~CKBlastpArgs()671     virtual ~CKBlastpArgs() {}
672 
673     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
674     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
675     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
676     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
677                                          CBlastOptions& options);
678 
679 
680     /// Get the Jaccard distance
GetJaccardDistance(void)681     double GetJaccardDistance(void) { return m_JDistance;}
682 
683     /// Get the minimum number of LSH matches.
GetMinHits(void)684     int GetMinHits(void) {return m_MinHits;}
685 
686     /// The database
GetDatabase(void)687     string GetDatabase(void) {return m_DbIndex;}
688 
689     /// Number of candidate sequences to attempt with BLASTP
GetCandidateSeqs(void)690     int GetCandidateSeqs(void) {return m_CandidateSeqs;}
691 
692 private:
693     /// Prohibit copy constructor
694     CKBlastpArgs(const CKBlastpArgs& rhs);
695     /// Prohibit assignment operator
696     CKBlastpArgs& operator=(const CKBlastpArgs& rhs);
697 
698     /// Jaccard distance
699     double m_JDistance;
700 
701     /// Minimum number of hits in LSH phase
702     int m_MinHits;
703 
704     /// Database/index
705     string m_DbIndex;
706 
707     /// Number of candidate sequences to try BLAST on.
708     int m_CandidateSeqs;
709 };
710 
711 /// Argument class to collect options specific to DELTA-BLAST
712 class NCBI_BLASTINPUT_EXPORT CDeltaBlastArgs : public IBlastCmdLineArgs
713 {
714 public:
715 
716     /// Constructor
CDeltaBlastArgs(void)717     CDeltaBlastArgs(void) : m_ShowDomainHits(false) {}
718 
719     /// Our virtual destructor
~CDeltaBlastArgs()720     virtual ~CDeltaBlastArgs() {}
721 
722     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
723     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
724     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
725     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
726                                          CBlastOptions& options);
727 
728     /// Get domain database
GetDomainDatabase(void)729     CRef<CSearchDatabase> GetDomainDatabase(void)
730     {return m_DomainDb;}
731 
732     /// Get show domain hits option value
GetShowDomainHits(void) const733     bool GetShowDomainHits(void) const {return m_ShowDomainHits;}
734 
735 private:
736     /// Prohibit copy constructor
737     CDeltaBlastArgs(const CDeltaBlastArgs& rhs);
738     /// Prohibit assignment operator
739     CDeltaBlastArgs& operator=(const CDeltaBlastArgs& rhs);
740 
741 private:
742 
743     /// Conserved Domain Database
744     CRef<CSearchDatabase> m_DomainDb;
745 
746     /// Is printing CDD hits requested
747     bool m_ShowDomainHits;
748 };
749 
750 
751 class NCBI_BLASTINPUT_EXPORT CMappingArgs : public IBlastCmdLineArgs
752 {
753 public:
CMappingArgs(void)754     CMappingArgs(void) {}
755 
756     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
757     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
758     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
759     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
760                                          CBlastOptions& options);
761 
762 };
763 
764 /*****************************************************************************/
765 // Input options
766 
767 /// Argument class to collect query options
768 class NCBI_BLASTINPUT_EXPORT CQueryOptionsArgs : public IBlastCmdLineArgs
769 {
770 public:
771     /**
772      * @brief Constructor
773      *
774      * @param query_cannot_be_nucl can the query not be nucleotide?
775      */
CQueryOptionsArgs(bool query_cannot_be_nucl=false)776     CQueryOptionsArgs(bool query_cannot_be_nucl = false)
777         : m_Strand(objects::eNa_strand_unknown), m_Range(),
778         m_UseLCaseMask(kDfltArgUseLCaseMasking),
779         m_ParseDeflines(kDfltArgParseDeflines),
780         m_QueryCannotBeNucl(query_cannot_be_nucl)
781     {};
782 
783     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
784     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
785     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
786     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
787                                          CBlastOptions& options);
788 
789     /// Get query sequence range restriction
GetRange() const790     TSeqRange GetRange() const { return m_Range; }
791     /// Set query sequence range restriction
SetRange(const TSeqRange & range)792     void SetRange(const TSeqRange& range) { m_Range = range; }
793     /// Get strand to search in query sequence(s)
GetStrand() const794     objects::ENa_strand GetStrand() const { return m_Strand; }
795     /// Use lowercase masking in FASTA input?
UseLowercaseMasks() const796     bool UseLowercaseMasks() const { return m_UseLCaseMask; }
797     /// Should the defline be parsed?
GetParseDeflines() const798     bool GetParseDeflines() const { return m_ParseDeflines; }
799 
800     /// Is the query sequence protein?
QueryIsProtein() const801     bool QueryIsProtein() const { return m_QueryCannotBeNucl; }
802 
803 private:
804     /// Strand(s) to search
805     objects::ENa_strand m_Strand;
806     /// range to restrict the query sequence(s)
807     TSeqRange m_Range;
808     /// use lowercase masking in FASTA input
809     bool m_UseLCaseMask;
810     /// Should the deflines be parsed?
811     bool m_ParseDeflines;
812 
813     /// only false for blast[xn], and tblastx
814     /// true in case of PSI-BLAST
815     bool m_QueryCannotBeNucl;
816 };
817 
818 /// Argument class to collect query options for BLAST Mapper
819 class NCBI_BLASTINPUT_EXPORT CMapperQueryOptionsArgs : public CQueryOptionsArgs
820 {
821 public:
822 
823     /// Input formats
824     enum EInputFormat {
825         eFasta = 0,
826         eFastc,
827         eFastq,
828         eASN1text,
829         eASN1bin,
830         eSra
831     };
832 
833 
CMapperQueryOptionsArgs(void)834     CMapperQueryOptionsArgs(void)
835         : CQueryOptionsArgs(false),
836           m_IsPaired(false),
837           m_InputFormat(eFasta),
838           m_MateInputStream(NULL),
839           m_EnableSraCache(false)
840     {}
841 
842     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
843     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
844     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
845     virtual void ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt);
846 
847     /// Are query sequences paired
IsPaired(void) const848     bool IsPaired(void) const {return m_IsPaired;}
849 
850     /// Are queries provided in Fastc format
GetInputFormat(void) const851     EInputFormat GetInputFormat(void) const
852     {return m_InputFormat;}
853 
854     /// Does the mate input stream exits
HasMateInputStream(void) const855     bool HasMateInputStream(void) const {return m_MateInputStream;}
856 
857     /// Get input stream for query mates
GetMateInputStream(void) const858     CNcbiIstream* GetMateInputStream(void) const {return m_MateInputStream;}
859 
860     /// Get a list of SRA accessions
GetSraAccessions(void) const861     const vector<string>& GetSraAccessions(void) const
862     {return m_SraAccessions;}
863 
864     /// Is SRA caching in local files enabled
865     /// (see File Caching at
866     /// https://github.com/ncbi/sra-tools/wiki/Toolkit-Configuration)
IsSraCacheEnabled(void) const867     bool IsSraCacheEnabled(void) const {return m_EnableSraCache;}
868 
869 private:
870     bool m_IsPaired;
871     EInputFormat m_InputFormat;
872     vector<string> m_SraAccessions;
873 
874     CNcbiIstream* m_MateInputStream;
875     auto_ptr<CDecompressIStream> m_DecompressIStream;
876 
877     bool m_EnableSraCache;
878 };
879 
880 
881 /// Argument class to collect database/subject arguments
882 class NCBI_BLASTINPUT_EXPORT CBlastDatabaseArgs : public IBlastCmdLineArgs
883 {
884 public:
885     /// The default priority for subjects, should be used for
886     /// subjects/databases
887     static const int kSubjectsDataLoaderPriority = 10;
888 
889     /// alias for the database molecule type
890     typedef CSearchDatabase::EMoleculeType EMoleculeType;
891 
892     /// Auxiliary function to determine if the database/subject sequence has
893     /// been set
894     static bool HasBeenSet(const CArgs& args);
895 
896     /// Constructor
897     /// @param request_mol_type If true, the command line arguments will
898     /// include a mandatory option to disambiguate whether a protein or a
899     /// nucleotide database is searched
900     /// @param is_rpsblast is it RPS-BLAST?
901     /// @param is_igblast is it IG-BLAST?
902     /// @param is_deltablast is it DELTA-BLAST?
903     CBlastDatabaseArgs(bool request_mol_type = false,
904                        bool is_rpsblast = false,
905                        bool is_igblast = false,
906                        bool is_mapper = false,
907                        bool is_kblast = false);
908     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
909     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
910     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
911     virtual void ExtractAlgorithmOptions(const CArgs& args,
912                                          CBlastOptions& opts);
913 
914     /// Turns on/off database masking support
SetDatabaseMaskingSupport(bool val)915     void SetDatabaseMaskingSupport(bool val) {
916         m_SupportsDatabaseMasking = val;
917     }
918 
919     /// Is the database/subject protein?
IsProtein() const920     bool IsProtein() const { return m_IsProtein; }
921 
922     /// Get the BLAST database name
923     /// @return empty string in the case of BLAST2Sequences, otherwise the
924     /// BLAST database name
GetDatabaseName() const925     string GetDatabaseName() const {
926         return m_SearchDb.Empty() ? kEmptyStr : m_SearchDb->GetDatabaseName();
927     }
928 
929     /// Retrieve the search database information
GetSearchDatabase() const930     CRef<CSearchDatabase> GetSearchDatabase() const { return m_SearchDb; }
931     /// Set the search database information.
932     /// use case: recovering from search strategy
SetSearchDatabase(CRef<CSearchDatabase> search_db)933     void SetSearchDatabase(CRef<CSearchDatabase> search_db) {
934         m_SearchDb = search_db;
935         m_IsProtein = search_db->IsProtein();
936     }
937 
938     /// Sets the subject sequences.
939     /// use case: recovering from search strategy
SetSubjects(CRef<IQueryFactory> subjects,CRef<CScope> scope,bool is_protein)940     void SetSubjects(CRef<IQueryFactory> subjects, CRef<CScope> scope,
941                      bool is_protein) {
942         m_Subjects = subjects;
943         m_Scope = scope;
944         m_IsProtein = is_protein;
945     }
946 
947     /// Retrieve subject sequences, if provided
948     /// @param scope scope to which to sequence read will be added (if
949     /// non-NULL) [in]
950     /// @return empty CRef<> if no subjects were provided, otherwise a properly
951     /// initialized IQueryFactory object
GetSubjects(objects::CScope * scope=NULL)952     CRef<IQueryFactory> GetSubjects(objects::CScope* scope = NULL) {
953         if (m_Subjects && scope) {
954             // m_Scope contains the subject(s) read
955             _ASSERT(m_Scope.NotEmpty());
956             // Add the scope with a lower priority to avoid conflicts
957             scope->AddScope(*m_Scope, kSubjectsDataLoaderPriority);
958         }
959         return m_Subjects;
960     }
961 
SetIPGFilteringSupport(bool val)962     void SetIPGFilteringSupport(bool val) {
963         m_SupportIPGFiltering = val;
964     }
965 
966 protected:
967     CRef<CSearchDatabase> m_SearchDb;/**< Description of the BLAST database */
968     bool m_RequestMoleculeType;     /**< Determines whether the database's
969                                       molecule type should be requested in the
970                                       command line, true in case of PSI-BLAST
971                                       */
972     bool m_IsRpsBlast;              /**< true if the search is RPS-BLAST */
973     bool m_IsIgBlast;               /**< true if the search is Ig-BLAST */
974 
975     bool m_IsProtein;               /**< Is the database/subject(s) protein? */
976     bool m_IsMapper;                /**< true for short read mapper */
977     bool m_IsKBlast;                /**< true for Kblastp */
978     CRef<IQueryFactory> m_Subjects; /**< The subject sequences */
979     CRef<objects::CScope> m_Scope;  /**< CScope object in which all subject
980                                       sequences read are kept */
981     bool m_SupportsDatabaseMasking; /**< true if it's supported */
982     bool m_SupportIPGFiltering;     /**< true if IPG filtering is supported */
983 };
984 
985 /// Argument class to collect options specific to igBLAST
986 class NCBI_BLASTINPUT_EXPORT CIgBlastArgs : public IBlastCmdLineArgs
987 {
988 public:
CIgBlastArgs(bool is_protein)989     CIgBlastArgs(bool is_protein) : m_IsProtein(is_protein) {};
990 
991     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
992     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
993     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
994     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
995                                          CBlastOptions& options);
996 
GetIgBlastOptions()997     CRef<CIgBlastOptions> GetIgBlastOptions() { return m_IgOptions; }
998 
AddIgSequenceScope(CRef<objects::CScope> scope)999     void AddIgSequenceScope(CRef<objects::CScope> scope) {
1000 
1001         if (m_Scope.NotEmpty()) {
1002             // Add the scope with a lower priority to avoid conflicts
1003             scope->AddScope(*m_Scope,
1004                   CBlastDatabaseArgs::kSubjectsDataLoaderPriority);
1005         }
1006     }
1007 
1008 private:
1009     /// Is this a protein search?
1010     bool m_IsProtein;
1011     /// Igblast options to fill
1012     CRef<CIgBlastOptions> m_IgOptions;
1013     /// scope to get sequences
1014     CRef<objects::CScope> m_Scope;
1015 };
1016 
1017 /// Argument class to collect formatting options, use this to create a
1018 /// CBlastFormat object.
1019 /// @note This object is also needed to set the maximum number of target
1020 /// sequences to save (hitlist size)
1021 class NCBI_BLASTINPUT_EXPORT CFormattingArgs : public IBlastCmdLineArgs
1022 {
1023 public:
1024     /// Defines the output formats supported by our command line formatter
1025     enum EOutputFormat {
1026         /// Standard pairwise alignments
1027         ePairwise = 0,
1028         ///< Query anchored showing identities
1029         eQueryAnchoredIdentities,
1030         ///< Query anchored no identities
1031         eQueryAnchoredNoIdentities,
1032         ///< Flat query anchored showing identities
1033         eFlatQueryAnchoredIdentities,
1034         ///< Flat query anchored no identities
1035         eFlatQueryAnchoredNoIdentities,
1036         /// XML output
1037         eXml,
1038         /// Tabular output
1039         eTabular,
1040         /// Tabular output with comments
1041         eTabularWithComments,
1042         /// ASN.1 text output
1043         eAsnText,
1044         /// ASN.1 binary output
1045         eAsnBinary,
1046         /// Comma-separated values
1047         eCommaSeparatedValues,
1048         /// BLAST archive format
1049         eArchiveFormat,
1050         /// JSON seq-align
1051         eJsonSeqalign,
1052         /// JSON XInclude
1053         eJson,
1054         /// XML2 XInclude
1055         eXml2,
1056         /// JSON2 single file
1057         eJson_S,
1058         /// XML2 single file
1059         eXml2_S,
1060         /// SAM format
1061         eSAM,
1062 
1063         eTaxFormat,
1064 
1065         ///igblast AIRR rearrangement, 19
1066         eAirrRearrangement,
1067 
1068         /// unaligned reads in magicblast
1069         eFasta,
1070         /// Sentinel value for error checking
1071         eEndValue
1072 
1073     };
1074 
1075     enum EFormatFlags {
1076     	eDefaultFlag = 0,
1077     	// Set if VDB
1078     	eIsVDB = 0x01,
1079     	// Set if SAM format is supported
1080     	eIsSAM = 0x02,
1081     	// Set if both VDB and SAM is true
1082     	eIsVDB_SAM = eIsVDB | eIsSAM,
1083         //Is eAirrRearrangement format supported?
1084         eIsAirrRearrangement = 0x04
1085     };
1086     /// Default constructor
CFormattingArgs(bool isIgblast=false,EFormatFlags flag=eDefaultFlag)1087     CFormattingArgs(bool isIgblast = false, EFormatFlags flag = eDefaultFlag)
1088         : m_OutputFormat(ePairwise), m_ShowGis(false),
1089         m_NumDescriptions(0), m_NumAlignments(0),
1090         m_DfltNumDescriptions(0), m_DfltNumAlignments(0),
1091         m_Html(false),
1092         m_IsIgBlast(isIgblast),
1093         m_LineLength(align_format::kDfltLineLength),
1094         m_FormatFlags(flag),
1095         m_HitsSortOption(-1),
1096         m_HspsSortOption(-1)
1097     {
1098         if (m_IsIgBlast) {
1099             m_DfltNumAlignments = m_DfltNumDescriptions = 10;
1100         } else {
1101             m_DfltNumAlignments = align_format::kDfltArgNumAlignments;
1102             m_DfltNumDescriptions = align_format::kDfltArgNumDescriptions;
1103         }
1104     };
1105 
1106     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1107     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1108     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1109     virtual void ExtractAlgorithmOptions(const CArgs& args,
1110                                          CBlastOptions& opts);
1111 
1112     /// Parses the output format command line option value, returns the
1113     /// requested output format type and any custom output formats (if
1114     /// any and applicable)
1115     /// @param args Command line arguments object [in]
1116     /// @param fmt_type Output format type requested in command line options
1117     /// [out]
1118     /// @param custom_fmt_spec Custom output format specification in command
1119     /// line options [out]
1120     virtual void
1121     ParseFormattingString(const CArgs& args,
1122                           EOutputFormat& fmt_type,
1123                           string& custom_fmt_spec,
1124                           string& custom_delim) const;
1125 
1126     /// Get the choice of formatted output
GetFormattedOutputChoice() const1127     EOutputFormat GetFormattedOutputChoice() const {
1128         return m_OutputFormat;
1129     }
1130 
1131     /// Returns true if the desired output format is structured (needed to
1132     /// determine whether to print or not that a PSI-BLAST search has
1133     /// converged - this is not supported in structured formats)
HasStructuredOutputFormat() const1134     bool HasStructuredOutputFormat() const {
1135         return m_OutputFormat == eXml ||
1136             m_OutputFormat == eAsnText ||
1137             m_OutputFormat == eAsnBinary ||
1138             m_OutputFormat == eXml2 ||
1139             m_OutputFormat == eJson ||
1140             m_OutputFormat == eXml2_S ||
1141             m_OutputFormat == eJson_S ||
1142             m_OutputFormat == eJsonSeqalign ||
1143             m_OutputFormat == eSAM;
1144     }
1145 
1146     /// Display the NCBI GIs in formatted output?
ShowGis() const1147     bool ShowGis() const {
1148         return m_ShowGis;
1149     }
1150     /// Number of one-line descriptions to show in traditional BLAST output
GetNumDescriptions() const1151     TSeqPos GetNumDescriptions() const {
1152         return m_NumDescriptions;
1153     }
1154     /// Number of alignments to show in traditional BLAST output
GetNumAlignments() const1155     TSeqPos GetNumAlignments() const {
1156         return m_NumAlignments;
1157     }
1158     /// Display HTML output?
DisplayHtmlOutput() const1159     bool DisplayHtmlOutput() const {
1160         return m_Html;
1161     }
1162 
1163     /// Retrieve for string that specifies the custom output format for tabular
1164     /// and comma-separated value
GetCustomOutputFormatSpec() const1165     string GetCustomOutputFormatSpec() const {
1166         return m_CustomOutputFormatSpec;
1167     }
1168 
1169     virtual bool ArchiveFormatRequested(const CArgs& args) const;
1170 
GetLineLength() const1171     size_t GetLineLength() const {
1172     	return m_LineLength;
1173     }
GetHitsSortOption() const1174     int GetHitsSortOption() const {
1175         return m_HitsSortOption;
1176     }
GetHspsSortOption() const1177     int GetHspsSortOption() const {
1178         return m_HspsSortOption;
1179     }
GetCustomDelimiter()1180     string GetCustomDelimiter(){return m_CustomDelim;}
1181 
1182 protected:
1183     EOutputFormat m_OutputFormat;   ///< Choice of formatting output
1184     bool m_ShowGis;                 ///< Display NCBI GIs?
1185     TSeqPos m_NumDescriptions;      ///< Number of 1-line descr. to show
1186     TSeqPos m_NumAlignments;        ///< Number of alignments to show
1187     TSeqPos m_DfltNumDescriptions;  ///< Default value for num descriptions
1188     TSeqPos m_DfltNumAlignments;    ///< Default value for num alignments
1189     bool m_Html;                    ///< Display HTML output?
1190     bool m_IsIgBlast;               ///< IgBlast has a different default num_alignments
1191     /// The format specification for custom output, e.g.: tabular or
1192     /// comma-separated value (populated if applicable)
1193     string m_CustomOutputFormatSpec;
1194     size_t m_LineLength;
1195     EFormatFlags m_FormatFlags;
1196     int m_HitsSortOption;
1197     int m_HspsSortOption;
1198     string m_CustomDelim;
1199 };
1200 
1201 /// Formatting args for magicblast advertising only SAM and fast tabular
1202 /// formats
1203 class NCBI_BLASTINPUT_EXPORT CMapperFormattingArgs : public CFormattingArgs
1204 {
1205 public:
1206 
CMapperFormattingArgs(void)1207     CMapperFormattingArgs(void) :
1208         CFormattingArgs(),
1209         m_TrimReadIds(true),
1210         m_PrintUnaligned(true),
1211         m_NoDiscordant(false),
1212         m_FwdRev(false),
1213         m_RevFwd(false),
1214         m_FwdOnly(false),
1215         m_RevOnly(false),
1216         m_OnlyStrandSpecific(false),
1217         m_PrintMdTag(false),
1218         m_UnalignedOutputFormat(eSAM)
1219     {}
1220 
1221     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1222 
1223     virtual void ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt);
1224 
ArchiveFormatRequested(const CArgs &) const1225     virtual bool ArchiveFormatRequested(const CArgs& /*args*/) const {
1226         return false;
1227     }
1228 
1229     /// Should read ids be in SAM format be trimmed of .1 and .2 endings
1230     /// for paired mapping
TrimReadIds(void) const1231     bool TrimReadIds(void) const {return m_TrimReadIds;}
1232 
1233     /// Should unaligned reads be reported
PrintUnaligned(void) const1234     bool PrintUnaligned(void) const {return m_PrintUnaligned;}
1235 
1236     /// Should non-concordant pairs be filtered out of report
NoDiscordant(void) const1237     bool NoDiscordant(void) const {return m_NoDiscordant;}
1238 
1239     /// Specify fwd/ref strands
SelectFwdRev(void) const1240     bool SelectFwdRev(void) const {return m_FwdRev;}
1241 
1242     /// Specify rev/fwd strands
SelectRevFwd(void) const1243     bool SelectRevFwd(void) const {return m_RevFwd;}
1244 
1245     /// Specify fwd-only strands
SelectFwdOnly(void) const1246     bool SelectFwdOnly(void) const {return m_FwdOnly;}
1247 
1248     /// Specify rev-only strands
SelectRevOnly(void) const1249     bool SelectRevOnly(void) const {return m_RevOnly;}
1250 
1251     /// Specify only-strand-specific
SelectOnlyStrandSpecific(void) const1252     bool SelectOnlyStrandSpecific(void) const {return m_OnlyStrandSpecific;}
1253 
1254     /// Should MD tag be included in SAM report
PrintMdTag(void) const1255     bool PrintMdTag(void) const {return m_PrintMdTag;}
1256 
1257     /// Get format choice for unaligned reads
GetUnalignedOutputFormat(void) const1258     EOutputFormat GetUnalignedOutputFormat(void) const
1259     {return m_UnalignedOutputFormat;}
1260 
1261 private:
1262     bool m_TrimReadIds;
1263     bool m_PrintUnaligned;
1264     bool m_NoDiscordant;
1265     bool m_FwdRev;
1266     bool m_RevFwd;
1267     bool m_FwdOnly;
1268     bool m_RevOnly;
1269     bool m_OnlyStrandSpecific;
1270     bool m_PrintMdTag;
1271     EOutputFormat m_UnalignedOutputFormat;
1272 };
1273 
1274 /// Argument class to collect multi-threaded arguments
1275 class NCBI_BLASTINPUT_EXPORT CMTArgs : public IBlastCmdLineArgs
1276 {
1277 public:
1278 	enum EMTMode {
1279 		eNotSupported = -1,
1280 		eSplitByDB,
1281 	    eSplitByQueries
1282 	};
1283     /// Default Constructor
CMTArgs(size_t default_num_threads=CThreadable::kMinNumThreads,EMTMode mt_mode=eNotSupported)1284     CMTArgs(size_t default_num_threads = CThreadable::kMinNumThreads, EMTMode mt_mode = eNotSupported) :
1285     	m_NumThreads(default_num_threads), m_MTMode(mt_mode)
1286     {
1287 #ifdef NCBI_NO_THREADS
1288         // No threads can be set in NON-MT mode
1289         m_NumThreads = CThreadable::kMinNumThreads;
1290         m_MTMode = eNotSupported;
1291 #endif
1292     }
1293     CMTArgs(const CArgs& cmd_line_args);
1294     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1295     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1296     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1297     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
1298                                          CBlastOptions& options);
1299 
1300     /// Get the number of threads to spawn
GetNumThreads() const1301     size_t GetNumThreads() const { return m_NumThreads; }
1302 
GetMTMode() const1303     int GetMTMode() const { return m_MTMode; }
1304 
1305 protected:
1306     void x_ExtractAlgorithmOptions(const CArgs& args);
1307     size_t m_NumThreads;        ///< Number of threads to spawn
1308     EMTMode m_MTMode;
1309 };
1310 
1311 /// Argument class to collect remote vs. local execution
1312 class NCBI_BLASTINPUT_EXPORT CRemoteArgs : public IBlastCmdLineArgs
1313 {
1314 public:
1315     /// Default constructor
CRemoteArgs()1316     CRemoteArgs() : m_IsRemote(false) {}
1317     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1318     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1319     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1320     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
1321                                          CBlastOptions& options);
1322 
1323     /// Return whether the search should be executed remotely or not
ExecuteRemotely() const1324     bool ExecuteRemotely() const { return m_IsRemote; }
1325 
1326 private:
1327     /// Should the search be executed remotely?
1328     bool m_IsRemote;
1329 };
1330 
1331 /// Argument class to collect debugging options.
1332 /// Only show in command line if compiled with _BLAST_DEBUG
1333 class NCBI_BLASTINPUT_EXPORT CDebugArgs : public IBlastCmdLineArgs
1334 {
1335 public:
1336     /// Default constructor
CDebugArgs()1337     CDebugArgs() : m_DebugOutput(false), m_RmtDebugOutput(false) {}
1338     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1339     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1340     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1341     virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
1342                                          CBlastOptions& options);
1343 
1344     /// Return whether debug (verbose) output should be produced on remote
1345     /// searches (only available when compiled with _DEBUG)
ProduceDebugRemoteOutput() const1346     bool ProduceDebugRemoteOutput() const { return m_RmtDebugOutput; }
1347     /// Return whether debug (verbose) output should be produced
1348     /// (only available when compiled with _DEBUG)
ProduceDebugOutput() const1349     bool ProduceDebugOutput() const { return m_DebugOutput; }
1350 private:
1351 
1352     /// Should debugging (verbose) output be printed
1353     bool m_DebugOutput;
1354     /// Should debugging (verbose) output be printed for remote BLAST
1355     bool m_RmtDebugOutput;
1356 };
1357 
1358 /// Argument class to retrieve options for filtering HSPs (e.g.: culling
1359 /// options, best hit algorithm options)
1360 class NCBI_BLASTINPUT_EXPORT CHspFilteringArgs : public IBlastCmdLineArgs
1361 {
1362 public:
1363     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1364     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1365     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1366     virtual void ExtractAlgorithmOptions(const CArgs& args,
1367                                          CBlastOptions& opts);
1368 };
1369 
1370 /// Argument class to retrieve megablast database indexing options
1371 class NCBI_BLASTINPUT_EXPORT CMbIndexArgs : public IBlastCmdLineArgs
1372 {
1373 public:
1374     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1375     virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1376     /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1377     virtual void ExtractAlgorithmOptions(const CArgs& args,
1378                                          CBlastOptions& opts);
1379 
1380     /// Auxiliary function to determine if the megablast database indexing
1381     /// options have been set
1382     static bool HasBeenSet(const CArgs& args);
1383 };
1384 
1385 /// Type definition of a container of IBlastCmdLineArgs
1386 typedef vector< CRef<IBlastCmdLineArgs> > TBlastCmdLineArgs;
1387 
1388 
1389 /// Base command line argument class for a generic BLAST command line binary
1390 class NCBI_BLASTINPUT_EXPORT CBlastAppArgs : public CObject
1391 {
1392 public:
1393     /// Default constructor
1394     CBlastAppArgs();
1395     /// Our virtual destructor
~CBlastAppArgs()1396     virtual ~CBlastAppArgs() {}
1397 
1398     /// Set the command line arguments
1399     CArgDescriptions* SetCommandLine();
1400 
1401     /// Get the task for this object
GetTask() const1402     string  GetTask() const {
1403         return m_Task;
1404     }
1405 
1406     /// Set the task for this object
1407     /// @param task task name to set [in]
1408     void SetTask(const string& task);
1409 
1410     /// Extract the command line arguments into a CBlastOptionsHandle object
1411     /// @param args Commad line arguments [in]
1412     CRef<CBlastOptionsHandle> SetOptions(const CArgs& args);
1413 
1414     /// Combine the command line arguments into a CBlastOptions object
1415     /// recovered from saved search strategy
1416     /// @param args Commad line arguments [in]
1417     CRef<CBlastOptionsHandle> SetOptionsForSavedStrategy(const CArgs& args);
1418 
1419     /// Setter for the BLAST options handle, this is used if the options are
1420     /// recovered from a saved BLAST search strategy
SetOptionsHandle(CRef<CBlastOptionsHandle> opts_hndl)1421     void SetOptionsHandle(CRef<CBlastOptionsHandle> opts_hndl) {
1422         m_OptsHandle = opts_hndl;
1423     }
1424 
1425     /// Get the BLAST database arguments
GetBlastDatabaseArgs() const1426     CRef<CBlastDatabaseArgs> GetBlastDatabaseArgs() const {
1427         return m_BlastDbArgs;
1428     }
1429     /// Set the BLAST database arguments
SetBlastDatabaseArgs(CRef<CBlastDatabaseArgs> args)1430     void SetBlastDatabaseArgs(CRef<CBlastDatabaseArgs> args) {
1431         m_BlastDbArgs = args;
1432     }
1433 
1434     /// Get the options for the query sequence(s)
GetQueryOptionsArgs() const1435     CRef<CQueryOptionsArgs> GetQueryOptionsArgs() const {
1436         return m_QueryOptsArgs;
1437     }
1438 
1439     /// Get the formatting options
GetFormattingArgs() const1440     CRef<CFormattingArgs> GetFormattingArgs() const {
1441         return m_FormattingArgs;
1442     }
1443 
1444     /// Get the number of threads to spawn
GetNumThreads() const1445     size_t GetNumThreads() const {
1446         return m_MTArgs->GetNumThreads();
1447     }
1448 
1449     /// Get the input stream
1450     virtual CNcbiIstream& GetInputStream();
1451 
1452     /// Get the output stream
1453     virtual CNcbiOstream& GetOutputStream();
1454 
1455     /// Set the input stream to a temporary input file (needed when importing
1456     /// a search strategy)
1457     /// @param input_file temporary input file to read [in]
SetInputStream(CRef<CTmpFile> input_file)1458     void SetInputStream(CRef<CTmpFile> input_file) {
1459         m_StdCmdLineArgs->SetInputStream(input_file);
1460     }
1461 
1462     /// Get the input stream for the search strategy
GetImportSearchStrategyStream(const CArgs & args)1463     CNcbiIstream* GetImportSearchStrategyStream(const CArgs& args) {
1464         return m_SearchStrategyArgs->GetImportStream(args);
1465     }
1466     /// Get the output stream for the search strategy
GetExportSearchStrategyStream(const CArgs & args)1467     CNcbiOstream* GetExportSearchStrategyStream(const CArgs& args) {
1468         return m_SearchStrategyArgs->GetExportStream(args);
1469     }
1470 
1471     /// Determine whether the search should be executed remotely or not
ExecuteRemotely() const1472     bool ExecuteRemotely() const {
1473         return m_RemoteArgs->ExecuteRemotely();
1474     }
1475 
1476     /// Return whether debug (verbose) output should be produced on remote
1477     /// searches (only available when compiled with _DEBUG)
ProduceDebugRemoteOutput() const1478     bool ProduceDebugRemoteOutput() const {
1479         return m_DebugArgs->ProduceDebugRemoteOutput();
1480     }
1481 
1482     /// Return whether debug (verbose) output should be produced on remote
1483     /// searches (only available when compiled with _DEBUG)
ProduceDebugOutput() const1484     bool ProduceDebugOutput() const {
1485         return m_DebugArgs->ProduceDebugOutput();
1486     }
1487 
1488     /// Get the query batch size
1489     virtual int GetQueryBatchSize() const = 0;
1490 
1491     /// Retrieve the client ID for remote requests
GetClientId() const1492     string GetClientId() const {
1493         _ASSERT( !m_ClientId.empty() );
1494         return m_ClientId;
1495     }
1496 
1497 protected:
1498     /// Set of command line argument objects
1499     TBlastCmdLineArgs m_Args;
1500     /// query options object
1501     CRef<CQueryOptionsArgs> m_QueryOptsArgs;
1502     /// database/subject object
1503     CRef<CBlastDatabaseArgs> m_BlastDbArgs;
1504     /// formatting options
1505     CRef<CFormattingArgs> m_FormattingArgs;
1506     /// multi-threaded options
1507     CRef<CMTArgs> m_MTArgs;
1508     /// remote vs. local execution options
1509     CRef<CRemoteArgs> m_RemoteArgs;
1510     /// standard command line arguments class
1511     CRef<CStdCmdLineArgs> m_StdCmdLineArgs;
1512     /// arguments for dealing with search strategies
1513     CRef<CSearchStrategyArgs> m_SearchStrategyArgs;
1514     /// Debugging arguments
1515     CRef<CDebugArgs> m_DebugArgs;
1516     /// HSP filtering arguments
1517     CRef<CHspFilteringArgs> m_HspFilteringArgs;
1518     /// The BLAST options handle, only non-NULL if assigned via
1519     /// SetOptionsHandle, i.e.: from a saved search strategy
1520     CRef<CBlastOptionsHandle> m_OptsHandle;
1521     /// Task specified in the command line
1522     string m_Task;
1523     /// Client ID used for remote BLAST submissions, must be populated by
1524     /// subclasses
1525     string m_ClientId;
1526     /// Is this application being run ungapped
1527     bool m_IsUngapped;
1528 
1529     /// Create the options handle based on the command line arguments
1530     /// @param locality whether the search will be executed locally or remotely
1531     /// [in]
1532     /// @param args command line arguments [in]
1533     virtual CRef<CBlastOptionsHandle>
1534     x_CreateOptionsHandle(CBlastOptions::EAPILocality locality,
1535                           const CArgs& args) = 0;
1536 
1537     /** Creates the BLAST options handle based on the task argument
1538      * @param locality whether the search will be executed locally or remotely [in]
1539      * @param task program-specific BLAST named parameter set [in]
1540      */
1541     CRef<CBlastOptionsHandle>
1542     x_CreateOptionsHandleWithTask(CBlastOptions::EAPILocality locality,
1543                                   const string& task);
1544 
1545     /// Issue warnings when recovering from a search strategy (command line
1546     /// applications only)
1547     void x_IssueWarningsForIgnoredOptions(const CArgs& args);
1548 };
1549 
1550 /**
1551  * @brief Create a CArgDescriptions object and invoke SetArgumentDescriptions
1552  * for each of the TBlastCmdLineArgs in its argument list
1553  *
1554  * @param args arguments to configure the return value [in]
1555  *
1556  * @return a CArgDescriptions object with the command line options set
1557  */
1558 NCBI_BLASTINPUT_EXPORT
1559 CArgDescriptions*
1560 SetUpCommandLineArguments(TBlastCmdLineArgs& args);
1561 
1562 END_SCOPE(blast)
1563 END_NCBI_SCOPE
1564 
1565 #endif  /* ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP */
1566