1 /* $Id: blast_args.cpp 637471 2021-09-13 18:26:17Z ivanov $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's offical duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25 
26 /*****************************************************************************
27 
28 File name: blast_args.cpp
29 
30 Author: Jason Papadopoulos
31 
32 ******************************************************************************/
33 
34 /** @file blast_args.cpp
35  * convert blast-related command line
36  * arguments into blast options
37 */
38 #include <ncbi_pch.hpp>
39 #include <corelib/ncbi_system.hpp>
40 #include <algo/blast/api/version.hpp>
41 #include <algo/blast/blastinput/blast_args.hpp>
42 #include <algo/blast/api/blast_exception.hpp>
43 #include <algo/blast/api/blast_aux.hpp>
44 #include <algo/blast/api/objmgr_query_data.hpp> /* for CObjMgrQueryFactory */
45 #include <algo/blast/core/blast_nalookup.h>
46 #include <algo/blast/core/hspfilter_besthit.h>
47 #include <objects/scoremat/PssmWithParameters.hpp>
48 #include <util/format_guess.hpp>
49 #include <util/line_reader.hpp>
50 #include <objtools/blast/seqdb_reader/seqdb.hpp>
51 #include <algo/blast/blastinput/blast_input.hpp>    // for CInputException
52 #include <algo/winmask/seq_masker_istat_factory.hpp>    // for CSeqMaskerIstatFactory::DiscoverStatType
53 #include <connect/ncbi_connutil.h>
54 #include <objtools/align_format/align_format_util.hpp>
55 
56 #include <algo/blast/api/msa_pssm_input.hpp>    // for CPsiBlastInputClustalW
57 #include <algo/blast/api/pssm_engine.hpp>       // for CPssmEngine
58 
59 BEGIN_NCBI_SCOPE
60 BEGIN_SCOPE(blast)
61 USING_SCOPE(objects);
62 USING_SCOPE(align_format);
63 
64 void
ExtractAlgorithmOptions(const CArgs &,CBlastOptions &)65 IBlastCmdLineArgs::ExtractAlgorithmOptions(const CArgs& /* cmd_line_args */,
66                                            CBlastOptions& /* options */)
67 {}
68 
CProgramDescriptionArgs(const string & program_name,const string & program_desc)69 CProgramDescriptionArgs::CProgramDescriptionArgs(const string& program_name,
70                                                  const string& program_desc)
71     : m_ProgName(program_name), m_ProgDesc(program_desc)
72 {}
73 
74 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)75 CProgramDescriptionArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
76 {
77     // program description
78     arg_desc.SetUsageContext(m_ProgName, m_ProgDesc + " " +
79                              CBlastVersion().Print());
80 }
81 
CTaskCmdLineArgs(const set<string> & supported_tasks,const string & default_task)82 CTaskCmdLineArgs::CTaskCmdLineArgs(const set<string>& supported_tasks,
83                                    const string& default_task)
84 : m_SupportedTasks(supported_tasks), m_DefaultTask(default_task)
85 {
86     _ASSERT( !m_SupportedTasks.empty() );
87     if ( !m_DefaultTask.empty() ) {
88         _ASSERT(m_SupportedTasks.find(m_DefaultTask) != m_SupportedTasks.end());
89     }
90 }
91 
92 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)93 CTaskCmdLineArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
94 {
95     arg_desc.SetCurrentGroup("General search options");
96     if ( !m_DefaultTask.empty() ) {
97         arg_desc.AddDefaultKey(kTask, "task_name", "Task to execute",
98                                CArgDescriptions::eString, m_DefaultTask);
99     } else {
100         arg_desc.AddKey(kTask, "task_name", "Task to execute",
101                         CArgDescriptions::eString);
102     }
103     arg_desc.SetConstraint(kTask, new CArgAllowStringSet(m_SupportedTasks));
104     arg_desc.SetCurrentGroup("");
105 
106 }
107 
108 void
ExtractAlgorithmOptions(const CArgs &,CBlastOptions &)109 CTaskCmdLineArgs::ExtractAlgorithmOptions(const CArgs& /* cmd_line_args */,
110                                           CBlastOptions& /* options */)
111 {
112     // N.B.: handling of tasks occurs at the application level to ensure that
113     // only relevant tasks are added (@sa CBlastnAppArgs)
114 }
115 
116 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)117 CGenericSearchArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
118 {
119     arg_desc.SetCurrentGroup("General search options");
120 
121     // evalue cutoff
122     if (!m_IsIgBlast) {
123         arg_desc.AddDefaultKey(kArgEvalue, "evalue",
124                      "Expectation value (E) threshold for saving hits ",
125                      CArgDescriptions::eDouble,
126                      NStr::DoubleToString(BLAST_EXPECT_VALUE));
127     } else if (m_QueryIsProtein) {
128         arg_desc.AddDefaultKey(kArgEvalue, "evalue",
129                      "Expectation value (E) threshold for saving hits ",
130                      CArgDescriptions::eDouble,
131                      NStr::DoubleToString(1.0));
132     } else {
133         //igblastn
134         arg_desc.AddDefaultKey(kArgEvalue, "evalue",
135                      "Expectation value (E) threshold for saving hits ",
136                      CArgDescriptions::eDouble,
137                      NStr::DoubleToString(20.0));
138     }
139 
140     // word size
141     // Default values: blastn=11, megablast=28, others=3
142     if(!m_IsRpsBlast) {
143         const string description = m_QueryIsProtein
144             ? "Word size for wordfinder algorithm"
145             : "Word size for wordfinder algorithm (length of best perfect match)";
146         arg_desc.AddOptionalKey(kArgWordSize, "int_value", description,
147                                 CArgDescriptions::eInteger);
148         arg_desc.SetConstraint(kArgWordSize, m_QueryIsProtein
149                                ? new CArgAllowValuesGreaterThanOrEqual(2)
150                                : new CArgAllowValuesGreaterThanOrEqual(4));
151     }
152 
153     if ( !m_IsRpsBlast && !m_IsTblastx) {
154         // gap open penalty
155         arg_desc.AddOptionalKey(kArgGapOpen, "open_penalty",
156                                 "Cost to open a gap",
157                                 CArgDescriptions::eInteger);
158 
159         // gap extend penalty
160         arg_desc.AddOptionalKey(kArgGapExtend, "extend_penalty",
161                                "Cost to extend a gap",
162                                CArgDescriptions::eInteger);
163     }
164 
165 
166     if (m_ShowPercentIdentity && !m_IsIgBlast) {
167         arg_desc.SetCurrentGroup("Restrict search or results");
168         arg_desc.AddOptionalKey(kArgPercentIdentity, "float_value",
169                                 "Percent identity",
170                                 CArgDescriptions::eDouble);
171         arg_desc.SetConstraint(kArgPercentIdentity,
172                                new CArgAllow_Doubles(0.0, 100.0));
173     }
174 
175     if (!m_IsIgBlast) {
176         arg_desc.SetCurrentGroup("Restrict search or results");
177         arg_desc.AddOptionalKey(kArgQueryCovHspPerc, "float_value",
178                             "Percent query coverage per hsp",
179                             CArgDescriptions::eDouble);
180         arg_desc.SetConstraint(kArgQueryCovHspPerc,
181                            new CArgAllow_Doubles(0.0, 100.0));
182 
183         arg_desc.AddOptionalKey(kArgMaxHSPsPerSubject, "int_value",
184                            "Set maximum number of HSPs per subject sequence to save for each query",
185                            CArgDescriptions::eInteger);
186         arg_desc.SetConstraint(kArgMaxHSPsPerSubject,
187                            new CArgAllowValuesGreaterThanOrEqual(1));
188 
189         arg_desc.SetCurrentGroup("Extension options");
190         // ungapped X-drop
191         // Default values: blastn=20, megablast=10, others=7
192         arg_desc.AddOptionalKey(kArgUngappedXDropoff, "float_value",
193                             "X-dropoff value (in bits) for ungapped extensions",
194                             CArgDescriptions::eDouble);
195 
196         // Tblastx is ungapped only.
197         if (!m_IsTblastx) {
198          // initial gapped X-drop
199          // Default values: blastn=30, megablast=20, tblastx=0, others=15
200             arg_desc.AddOptionalKey(kArgGappedXDropoff, "float_value",
201                  "X-dropoff value (in bits) for preliminary gapped extensions",
202                  CArgDescriptions::eDouble);
203 
204          // final gapped X-drop
205          // Default values: blastn/megablast=50, tblastx=0, others=25
206             arg_desc.AddOptionalKey(kArgFinalGappedXDropoff, "float_value",
207                          "X-dropoff value (in bits) for final gapped alignment",
208                          CArgDescriptions::eDouble);
209         }
210     }
211     arg_desc.SetCurrentGroup("Statistical options");
212     // effective search space
213     // Default value is the real size
214     arg_desc.AddOptionalKey(kArgEffSearchSpace, "int_value",
215                             "Effective length of the search space",
216                             CArgDescriptions::eInt8);
217     arg_desc.SetConstraint(kArgEffSearchSpace,
218                            new CArgAllowValuesGreaterThanOrEqual(0));
219 
220     if (!m_SuppressSumStats) {
221         arg_desc.AddOptionalKey(kArgSumStats, "bool_value",
222                      	 	"Use sum statistics",
223                      	 	CArgDescriptions::eBoolean);
224     }
225 
226     arg_desc.SetCurrentGroup("");
227 }
228 
229 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)230 CGenericSearchArgs::ExtractAlgorithmOptions(const CArgs& args,
231                                             CBlastOptions& opt)
232 {
233     if (args.Exist(kArgEvalue) && args[kArgEvalue]) {
234         opt.SetEvalueThreshold(args[kArgEvalue].AsDouble());
235     }
236 
237     int gap_open=0, gap_extend=0;
238     if (args.Exist(kArgMatrixName) && args[kArgMatrixName])
239          BLAST_GetProteinGapExistenceExtendParams
240              (args[kArgMatrixName].AsString().c_str(), &gap_open, &gap_extend);
241 
242     if (args.Exist(kArgGapOpen) && args[kArgGapOpen]) {
243         opt.SetGapOpeningCost(args[kArgGapOpen].AsInteger());
244     }
245     else if (args.Exist(kArgMatrixName) && args[kArgMatrixName]) {
246         opt.SetGapOpeningCost(gap_open);
247     }
248 
249     if (args.Exist(kArgGapExtend) && args[kArgGapExtend]) {
250         opt.SetGapExtensionCost(args[kArgGapExtend].AsInteger());
251     }
252     else if (args.Exist(kArgMatrixName) && args[kArgMatrixName]) {
253         opt.SetGapExtensionCost(gap_extend);
254     }
255 
256     if (args.Exist(kArgUngappedXDropoff) && args[kArgUngappedXDropoff]) {
257         opt.SetXDropoff(args[kArgUngappedXDropoff].AsDouble());
258     }
259 
260     if (args.Exist(kArgGappedXDropoff) && args[kArgGappedXDropoff]) {
261         opt.SetGapXDropoff(args[kArgGappedXDropoff].AsDouble());
262     }
263 
264     if (args.Exist(kArgFinalGappedXDropoff) && args[kArgFinalGappedXDropoff]) {
265         opt.SetGapXDropoffFinal(args[kArgFinalGappedXDropoff].AsDouble());
266     }
267 
268     if ( args.Exist(kArgWordSize) && args[kArgWordSize]) {
269         if (m_QueryIsProtein && args[kArgWordSize].AsInteger() > 5)
270            opt.SetLookupTableType(eCompressedAaLookupTable);
271         opt.SetWordSize(args[kArgWordSize].AsInteger());
272     }
273 
274     if (args.Exist(kArgEffSearchSpace) && args[kArgEffSearchSpace]) {
275         CNcbiEnvironment env;
276         env.Set("OLD_FSC", "true");
277         opt.SetEffectiveSearchSpace(args[kArgEffSearchSpace].AsInt8());
278     }
279 
280     if (args.Exist(kArgPercentIdentity) && args[kArgPercentIdentity]) {
281         opt.SetPercentIdentity(args[kArgPercentIdentity].AsDouble());
282     }
283 
284     if (args.Exist(kArgQueryCovHspPerc) && args[kArgQueryCovHspPerc]) {
285         opt.SetQueryCovHspPerc(args[kArgQueryCovHspPerc].AsDouble());
286     }
287 
288     if (args.Exist(kArgMaxHSPsPerSubject) && args[kArgMaxHSPsPerSubject]) {
289         opt.SetMaxHspsPerSubject(args[kArgMaxHSPsPerSubject].AsInteger());
290     }
291 
292     if (args.Exist(kArgSumStats) && args[kArgSumStats]) {
293         opt.SetSumStatisticsMode(args[kArgSumStats].AsBoolean());
294     }
295 }
296 
297 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)298 CFilteringArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
299 {
300     arg_desc.SetCurrentGroup("Query filtering options");
301 
302     if (m_QueryIsProtein) {
303         arg_desc.AddDefaultKey(kArgSegFiltering, "SEG_options",
304                         "Filter query sequence with SEG "
305                         "(Format: '" + kDfltArgApplyFiltering + "', " +
306                         "'window locut hicut', or '" + kDfltArgNoFiltering +
307                         "' to disable)",
308                         CArgDescriptions::eString, m_FilterByDefault
309                         ? kDfltArgSegFiltering : kDfltArgNoFiltering);
310         arg_desc.AddDefaultKey(kArgLookupTableMaskingOnly, "soft_masking",
311                         "Apply filtering locations as soft masks",
312                         CArgDescriptions::eBoolean,
313                         kDfltArgLookupTableMaskingOnlyProt);
314     } else {
315         arg_desc.AddDefaultKey(kArgDustFiltering, "DUST_options",
316                         "Filter query sequence with DUST "
317                         "(Format: '" + kDfltArgApplyFiltering + "', " +
318                         "'level window linker', or '" + kDfltArgNoFiltering +
319                         "' to disable)",
320                         CArgDescriptions::eString, m_FilterByDefault
321                         ? kDfltArgDustFiltering : kDfltArgNoFiltering);
322         arg_desc.AddOptionalKey(kArgFilteringDb, "filtering_database",
323                 "BLAST database containing filtering elements (i.e.: repeats)",
324                 CArgDescriptions::eString);
325 
326         arg_desc.AddOptionalKey(kArgWindowMaskerTaxId, "window_masker_taxid",
327                 "Enable WindowMasker filtering using a Taxonomic ID",
328                 CArgDescriptions::eInteger);
329 
330         arg_desc.AddOptionalKey(kArgWindowMaskerDatabase, "window_masker_db",
331                 "Enable WindowMasker filtering using this repeats database.",
332                 CArgDescriptions::eString);
333 
334         arg_desc.AddDefaultKey(kArgLookupTableMaskingOnly, "soft_masking",
335                         "Apply filtering locations as soft masks",
336                         CArgDescriptions::eBoolean,
337                         kDfltArgLookupTableMaskingOnlyNucl);
338     }
339 
340     arg_desc.SetCurrentGroup("");
341 }
342 
343 void
x_TokenizeFilteringArgs(const string & filtering_args,vector<string> & output) const344 CFilteringArgs::x_TokenizeFilteringArgs(const string& filtering_args,
345                                         vector<string>& output) const
346 {
347     output.clear();
348     NStr::Split(filtering_args, " ", output);
349     if (output.size() != 3) {
350         NCBI_THROW(CInputException, eInvalidInput,
351                    "Invalid number of arguments to filtering option");
352     }
353 }
354 
355 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)356 CFilteringArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
357 {
358     if (args[kArgLookupTableMaskingOnly]) {
359         opt.SetMaskAtHash(args[kArgLookupTableMaskingOnly].AsBoolean());
360     }
361 
362     vector<string> tokens;
363 
364     try {
365         if (m_QueryIsProtein && args[kArgSegFiltering]) {
366             const string& seg_opts = args[kArgSegFiltering].AsString();
367             if (seg_opts == kDfltArgNoFiltering) {
368                 opt.SetSegFiltering(false);
369             } else if (seg_opts == kDfltArgApplyFiltering) {
370                 opt.SetSegFiltering(true);
371             } else {
372                 x_TokenizeFilteringArgs(seg_opts, tokens);
373                 opt.SetSegFilteringWindow(NStr::StringToInt(tokens[0]));
374                 opt.SetSegFilteringLocut(NStr::StringToDouble(tokens[1]));
375                 opt.SetSegFilteringHicut(NStr::StringToDouble(tokens[2]));
376             }
377         }
378 
379         if ( !m_QueryIsProtein && args[kArgDustFiltering]) {
380             const string& dust_opts = args[kArgDustFiltering].AsString();
381             if (dust_opts == kDfltArgNoFiltering) {
382                 opt.SetDustFiltering(false);
383             } else if (dust_opts == kDfltArgApplyFiltering) {
384                 opt.SetDustFiltering(true);
385             } else {
386                 x_TokenizeFilteringArgs(dust_opts, tokens);
387                 opt.SetDustFilteringLevel(NStr::StringToInt(tokens[0]));
388                 opt.SetDustFilteringWindow(NStr::StringToInt(tokens[1]));
389                 opt.SetDustFilteringLinker(NStr::StringToInt(tokens[2]));
390             }
391         }
392     } catch (const CStringException& e) {
393         if (e.GetErrCode() == CStringException::eConvert) {
394             NCBI_THROW(CInputException, eInvalidInput,
395                        "Invalid input for filtering parameters");
396         }
397     }
398 
399     int filter_dbs = 0;
400 
401     if (args.Exist(kArgFilteringDb) && args[kArgFilteringDb]) {
402         opt.SetRepeatFilteringDB(args[kArgFilteringDb].AsString().c_str());
403         filter_dbs++;
404     }
405 
406     if (args.Exist(kArgWindowMaskerTaxId) &&
407         args[kArgWindowMaskerTaxId]) {
408 
409         opt.SetWindowMaskerTaxId
410             (args[kArgWindowMaskerTaxId].AsInteger());
411 
412         filter_dbs++;
413     }
414 
415     if (args.Exist(kArgWindowMaskerDatabase) &&
416         args[kArgWindowMaskerDatabase]) {
417         const string& stat_file = args[kArgWindowMaskerDatabase].AsString();
418         const CSeqMaskerIstatFactory::EStatType type =
419             CSeqMaskerIstatFactory::DiscoverStatType(stat_file);
420         if (type != CSeqMaskerIstatFactory::eOBinary &&
421             type != CSeqMaskerIstatFactory::eBinary) {
422             string msg("Only optimized binary windowmasker stat files are supported");
423             NCBI_THROW(CInputException, eInvalidInput, msg);
424         }
425 
426         opt.SetWindowMaskerDatabase(stat_file.c_str());
427         filter_dbs++;
428     }
429 
430     if (filter_dbs > 1) {
431         string msg =
432             string("Please specify at most one of ") + kArgFilteringDb + ", " +
433             kArgWindowMaskerTaxId + ", or " + kArgWindowMaskerDatabase + ".";
434 
435         NCBI_THROW(CInputException, eInvalidInput, msg);
436     }
437 }
438 
439 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)440 CWindowSizeArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
441 {
442     arg_desc.SetCurrentGroup("Extension options");
443     // 2-hit wordfinder window size
444     arg_desc.AddOptionalKey(kArgWindowSize, "int_value",
445                             "Multiple hits window size, use 0 to specify "
446                             "1-hit algorithm",
447                             CArgDescriptions::eInteger);
448     arg_desc.SetConstraint(kArgWindowSize,
449                            new CArgAllowValuesGreaterThanOrEqual(0));
450     arg_desc.SetCurrentGroup("");
451 }
452 
453 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)454 CWindowSizeArg::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
455 {
456     if (args[kArgWindowSize]) {
457         opt.SetWindowSize(args[kArgWindowSize].AsInteger());
458     } else {
459         int window = -1;
460         BLAST_GetSuggestedWindowSize(opt.GetProgramType(),
461                                      opt.GetMatrixName(),
462                                      &window);
463         if (window != -1) {
464             opt.SetWindowSize(window);
465         }
466     }
467 }
468 
469 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)470 COffDiagonalRangeArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
471 {
472     arg_desc.SetCurrentGroup("Extension options");
473     // 2-hit wordfinder off diagonal range
474     arg_desc.AddDefaultKey(kArgOffDiagonalRange, "int_value",
475                             "Number of off-diagonals to search for the 2nd hit, "
476                             "use 0 to turn off",
477                             CArgDescriptions::eInteger,
478                             NStr::IntToString(kDfltOffDiagonalRange));
479     arg_desc.SetConstraint(kArgOffDiagonalRange,
480                            new CArgAllowValuesGreaterThanOrEqual(0));
481     arg_desc.SetCurrentGroup("");
482 }
483 
484 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)485 COffDiagonalRangeArg::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
486 {
487     if (args[kArgOffDiagonalRange]) {
488         opt.SetOffDiagonalRange(args[kArgOffDiagonalRange].AsInteger());
489     } else {
490         opt.SetOffDiagonalRange(0);
491     }
492 }
493 
494 // Options specific to rmblastn -RMH-
495 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)496 CRMBlastNArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
497 {
498     arg_desc.SetCurrentGroup("General search options");
499 
500     arg_desc.AddDefaultKey(kArgMatrixName, "matrix_name",
501                            "Scoring matrix name",
502                            CArgDescriptions::eString,
503                            string(""));
504 
505     arg_desc.AddFlag(kArgComplexityAdj,
506                      "Use complexity adjusted scoring",
507                      true);
508 
509 
510     arg_desc.AddDefaultKey(kArgMaskLevel, "int_value",
511                             "Masklevel - percentage overlap allowed per "
512                             "query domain [0-101]",
513                             CArgDescriptions::eInteger,
514                             kDfltArgMaskLevel);
515     arg_desc.SetConstraint(kArgMaskLevel,
516                            new CArgAllowValuesLessThanOrEqual(101));
517 
518     arg_desc.SetCurrentGroup("");
519 }
520 
521 // Options specific to rmblastn -RMH-
522 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)523 CRMBlastNArg::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
524 {
525     if (args[kArgMatrixName]) {
526         opt.SetMatrixName(args[kArgMatrixName].AsString().c_str());
527     }
528 
529     opt.SetComplexityAdjMode( args[kArgComplexityAdj] );
530 
531     if (args[kArgMaskLevel]) {
532         opt.SetMaskLevel(args[kArgMaskLevel].AsInteger());
533     }
534 
535     if (args[kArgMinRawGappedScore]) {
536         opt.SetCutoffScore(args[kArgMinRawGappedScore].AsInteger());
537     }else if (args[kArgUngappedXDropoff]) {
538         opt.SetCutoffScore(args[kArgUngappedXDropoff].AsInteger());
539     }
540 }
541 
542 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)543 CWordThresholdArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
544 {
545     arg_desc.SetCurrentGroup("General search options");
546     // lookup table word score threshold
547     arg_desc.AddOptionalKey(kArgWordScoreThreshold, "float_value",
548                  "Minimum word score such that the word is added to the "
549                  "BLAST lookup table",
550                  CArgDescriptions::eDouble);
551     arg_desc.SetConstraint(kArgWordScoreThreshold,
552                            new CArgAllowValuesGreaterThanOrEqual(0));
553     arg_desc.SetCurrentGroup("");
554 }
555 
556 static bool
s_IsDefaultWordThreshold(EProgram program,double threshold)557 s_IsDefaultWordThreshold(EProgram program, double threshold)
558 {
559     int word_threshold = static_cast<int>(threshold);
560     bool retval = true;
561     if (program == eBlastp &&
562         word_threshold != BLAST_WORD_THRESHOLD_BLASTP) {
563         retval = false;
564     } else if (program == eBlastx &&
565         word_threshold != BLAST_WORD_THRESHOLD_BLASTX) {
566         retval = false;
567     } else if (program == eTblastn &&
568                word_threshold != BLAST_WORD_THRESHOLD_TBLASTN) {
569         retval = false;
570     }
571     return retval;
572 }
573 
574 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)575 CWordThresholdArg::ExtractAlgorithmOptions(const CArgs& args,
576                                            CBlastOptions& opt)
577 {
578     if (args[kArgWordScoreThreshold]) {
579         opt.SetWordThreshold(args[kArgWordScoreThreshold].AsDouble());
580     } else if (s_IsDefaultWordThreshold(opt.GetProgram(),
581                                         opt.GetWordThreshold())) {
582         double threshold = -1;
583         BLAST_GetSuggestedThreshold(opt.GetProgramType(),
584                                     opt.GetMatrixName(),
585                                     &threshold);
586         if (threshold != -1) {
587             opt.SetWordThreshold(threshold);
588         }
589     }
590 }
591 
592 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)593 CMatrixNameArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
594 {
595     arg_desc.SetCurrentGroup("General search options");
596     arg_desc.AddOptionalKey(kArgMatrixName, "matrix_name",
597                            "Scoring matrix name (normally BLOSUM62)",
598                            CArgDescriptions::eString);
599     arg_desc.SetCurrentGroup("");
600 }
601 
602 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)603 CMatrixNameArg::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
604 {
605     if (args[kArgMatrixName]) {
606         opt.SetMatrixName(args[kArgMatrixName].AsString().c_str());
607     }
608 }
609 
610 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)611 CNuclArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
612 {
613     // TLM arg_desc.SetCurrentGroup("Nucleotide scoring options");
614 
615     arg_desc.SetCurrentGroup("General search options");
616     // blastn mismatch penalty
617     arg_desc.AddOptionalKey(kArgMismatch, "penalty",
618                            "Penalty for a nucleotide mismatch",
619                            CArgDescriptions::eInteger);
620     arg_desc.SetConstraint(kArgMismatch,
621                            new CArgAllowValuesLessThanOrEqual(0));
622 
623     // blastn match reward
624     arg_desc.AddOptionalKey(kArgMatch, "reward",
625                            "Reward for a nucleotide match",
626                            CArgDescriptions::eInteger);
627     arg_desc.SetConstraint(kArgMatch,
628                            new CArgAllowValuesGreaterThanOrEqual(0));
629 
630 
631     arg_desc.SetCurrentGroup("Extension options");
632     arg_desc.AddFlag(kArgNoGreedyExtension,
633                      "Use non-greedy dynamic programming extension",
634                      true);
635 
636     arg_desc.SetCurrentGroup("");
637 }
638 
639 void
ExtractAlgorithmOptions(const CArgs & cmd_line_args,CBlastOptions & options)640 CNuclArgs::ExtractAlgorithmOptions(const CArgs& cmd_line_args,
641                                    CBlastOptions& options)
642 {
643     if (cmd_line_args.Exist(kArgMismatch) && cmd_line_args[kArgMismatch]) {
644         options.SetMismatchPenalty(cmd_line_args[kArgMismatch].AsInteger());
645     }
646     if (cmd_line_args.Exist(kArgMatch) && cmd_line_args[kArgMatch]) {
647         options.SetMatchReward(cmd_line_args[kArgMatch].AsInteger());
648     }
649 
650     if (cmd_line_args.Exist(kArgNoGreedyExtension) &&
651         cmd_line_args[kArgNoGreedyExtension]) {
652         options.SetGapExtnAlgorithm(eDynProgScoreOnly);
653         options.SetGapTracebackAlgorithm(eDynProgTbck);
654     }
655 }
656 
657 const string CDiscontiguousMegablastArgs::kTemplType_Coding("coding");
658 const string CDiscontiguousMegablastArgs::kTemplType_Optimal("optimal");
659 const string
660 CDiscontiguousMegablastArgs::kTemplType_CodingAndOptimal("coding_and_optimal");
661 
662 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)663 CDiscontiguousMegablastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
664 {
665     arg_desc.SetCurrentGroup("Extension options");
666     // FIXME: this can be applied to any program, but since it was only offered
667     // in megablast, we're putting it here
668     arg_desc.AddOptionalKey(kArgMinRawGappedScore, "int_value",
669                             "Minimum raw gapped score to keep an alignment "
670                             "in the preliminary gapped and traceback stages",
671                             CArgDescriptions::eInteger);
672 
673     arg_desc.SetCurrentGroup("Discontiguous MegaBLAST options");
674 
675     arg_desc.AddOptionalKey(kArgDMBTemplateType, "type",
676                  "Discontiguous MegaBLAST template type",
677                  CArgDescriptions::eString);
678     arg_desc.SetConstraint(kArgDMBTemplateType, &(*new CArgAllow_Strings,
679                                                   kTemplType_Coding,
680                                                   kTemplType_Optimal,
681                                                   kTemplType_CodingAndOptimal));
682     arg_desc.SetDependency(kArgDMBTemplateType,
683                            CArgDescriptions::eRequires,
684                            kArgDMBTemplateLength);
685 
686     arg_desc.AddOptionalKey(kArgDMBTemplateLength, "int_value",
687                  "Discontiguous MegaBLAST template length",
688                  CArgDescriptions::eInteger);
689     set<int> allowed_values;
690     allowed_values.insert(16);
691     allowed_values.insert(18);
692     allowed_values.insert(21);
693     arg_desc.SetConstraint(kArgDMBTemplateLength,
694                            new CArgAllowIntegerSet(allowed_values));
695     arg_desc.SetDependency(kArgDMBTemplateLength,
696                            CArgDescriptions::eRequires,
697                            kArgDMBTemplateType);
698 
699     arg_desc.SetCurrentGroup("");
700 }
701 
702 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & options)703 CDiscontiguousMegablastArgs::ExtractAlgorithmOptions(const CArgs& args,
704                                                      CBlastOptions& options)
705 {
706     if (args[kArgMinRawGappedScore]) {
707         options.SetCutoffScore(args[kArgMinRawGappedScore].AsInteger());
708     }
709 
710     if (args[kArgDMBTemplateType]) {
711         const string& type = args[kArgDMBTemplateType].AsString();
712         EDiscWordType temp_type = eMBWordCoding;
713 
714         if (type == kTemplType_Coding) {
715             temp_type = eMBWordCoding;
716         } else if (type == kTemplType_Optimal) {
717             temp_type = eMBWordOptimal;
718         } else if (type == kTemplType_CodingAndOptimal) {
719             temp_type = eMBWordTwoTemplates;
720         } else {
721             abort();
722         }
723         options.SetMBTemplateType(static_cast<unsigned char>(temp_type));
724     }
725 
726     if (args[kArgDMBTemplateLength]) {
727         unsigned char tlen =
728             static_cast<unsigned char>(args[kArgDMBTemplateLength].AsInteger());
729         options.SetMBTemplateLength(tlen);
730     }
731 
732     // FIXME: should the window size be adjusted if this is set?
733 }
734 
735 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)736 CCompositionBasedStatsArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
737 {
738     arg_desc.SetCurrentGroup("General search options");
739     // composition based statistics, keep in sync with ECompoAdjustModes
740     // documentation in composition_constants.h
741 
742     string zero_opt = !m_ZeroOptDescr.empty() ?
743         (string)"    0 or F or f: " + m_ZeroOptDescr + "\n" :
744         "    0 or F or f: No composition-based statistics\n";
745 
746     string one_opt_insrt = m_Is2and3Supported ? "" : " or T or t";
747 
748     string more_opts = m_Is2and3Supported ?
749         "    2 or T or t : Composition-based score adjustment as in "
750         "Bioinformatics 21:902-911,\n"
751         "    2005, conditioned on sequence properties\n"
752         "    3: Composition-based score adjustment as in "
753         "Bioinformatics 21:902-911,\n"
754         "    2005, unconditionally\n" : "";
755 
756     string legend = (string)"Use composition-based statistics:\n"
757         "    D or d: default (equivalent to " + m_DefaultOpt + " )\n"
758         + zero_opt
759         + "    1" + one_opt_insrt + ": Composition-based statistics "
760         "as in NAR 29:2994-3005, 2001\n"
761         + more_opts;
762 
763     arg_desc.AddDefaultKey(kArgCompBasedStats, "compo", legend,
764                            CArgDescriptions::eString, m_DefaultOpt);
765 
766 
767     arg_desc.SetCurrentGroup("Miscellaneous options");
768     // Use Smith-Waterman algorithm in traceback stage
769     // FIXME: available only for gapped blastp/tblastn, and with
770     // composition-based statistics
771     arg_desc.AddFlag(kArgUseSWTraceback,
772                      "Compute locally optimal Smith-Waterman alignments?",
773                      true);
774     arg_desc.SetCurrentGroup("");
775 }
776 
777 /**
778  * @brief Auxiliary function to set the composition based statistics and smith
779  * waterman options
780  *
781  * @param opt BLAST options object [in|out]
782  * @param comp_stat_string command line value for composition based statistics
783  * [in]
784  * @param smith_waterman_value command line value for determining the use of
785  * the smith-waterman algorithm [in]
786  * @param ungapped pointer to the value which determines whether the search
787  * should be ungapped or not. It is NULL if ungapped searches are not
788  * applicable
789  * @param is_deltablast is program deltablast [in]
790  */
791 static void
s_SetCompositionBasedStats(CBlastOptions & opt,const string & comp_stat_string,bool smith_waterman_value,bool * ungapped)792 s_SetCompositionBasedStats(CBlastOptions& opt,
793                            const string& comp_stat_string,
794                            bool smith_waterman_value,
795                            bool* ungapped)
796 {
797     const EProgram program = opt.GetProgram();
798     if (program == eBlastp || program == eTblastn ||
799         program == ePSIBlast || program == ePSITblastn ||
800         program == eRPSBlast || program == eRPSTblastn ||
801         program == eBlastx  ||  program == eDeltaBlast) {
802 
803         ECompoAdjustModes compo_mode = eNoCompositionBasedStats;
804 
805         switch (comp_stat_string[0]) {
806             case '0': case 'F': case 'f':
807                 compo_mode = eNoCompositionBasedStats;
808                 break;
809             case '1':
810                 compo_mode = eCompositionBasedStats;
811                 break;
812             case 'D': case 'd':
813                 if ((program == eRPSBlast) || (program == eRPSTblastn)) {
814                     compo_mode = eNoCompositionBasedStats;
815                 }
816                 else if (program == eDeltaBlast) {
817                     compo_mode = eCompositionBasedStats;
818                 }
819                 else {
820                     compo_mode = eCompositionMatrixAdjust;
821                 }
822                 break;
823             case '2':
824                 compo_mode = eCompositionMatrixAdjust;
825                 break;
826             case '3':
827                 compo_mode = eCompoForceFullMatrixAdjust;
828                 break;
829             case 'T': case 't':
830                 compo_mode = (program == eRPSBlast || program == eRPSTblastn || program == eDeltaBlast) ?
831                     eCompositionBasedStats : eCompositionMatrixAdjust;
832                 break;
833         }
834 
835         if(program == ePSITblastn) {
836             compo_mode = eNoCompositionBasedStats;
837         }
838 
839         if (ungapped && *ungapped && compo_mode != eNoCompositionBasedStats) {
840             NCBI_THROW(CInputException, eInvalidInput,
841                        "Composition-adjusted searched are not supported with "
842                        "an ungapped search, please add -comp_based_stats F or "
843                        "do a gapped search");
844         }
845 
846         opt.SetCompositionBasedStats(compo_mode);
847         if (program == eBlastp &&
848             compo_mode != eNoCompositionBasedStats &&
849             tolower(comp_stat_string[1]) == 'u') {
850             opt.SetUnifiedP(1);
851         }
852         opt.SetSmithWatermanMode(smith_waterman_value);
853     }
854 }
855 
856 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)857 CCompositionBasedStatsArgs::ExtractAlgorithmOptions(const CArgs& args,
858                                                     CBlastOptions& opt)
859 {
860     if (args[kArgCompBasedStats]) {
861         unique_ptr<bool> ungapped(args.Exist(kArgUngapped)
862             ? new bool(args[kArgUngapped]) : 0);
863         s_SetCompositionBasedStats(opt,
864                                    args[kArgCompBasedStats].AsString(),
865                                    args[kArgUseSWTraceback],
866                                    ungapped.get());
867     }
868 
869 }
870 
871 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)872 CGappedArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
873 {
874     // perform gapped search
875 #if 0
876     arg_desc.AddOptionalKey(ARG_GAPPED, "gapped",
877                  "Perform gapped alignment (default T, but "
878                  "not available for tblastx)",
879                  CArgDescriptions::eBoolean,
880                  CArgDescriptions::fOptionalSeparator);
881     arg_desc.AddAlias("-gapped", ARG_GAPPED);
882 #endif
883     arg_desc.SetCurrentGroup("Extension options");
884     arg_desc.AddFlag(kArgUngapped, "Perform ungapped alignment only?", true);
885     arg_desc.SetCurrentGroup("");
886 }
887 
888 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & options)889 CGappedArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& options)
890 {
891 #if 0
892     if (args[ARG_GAPPED] && options.GetProgram() != eTblastx) {
893         options.SetGappedMode(args[ARG_GAPPED].AsBoolean());
894     }
895 #endif
896     options.SetGappedMode( !args[kArgUngapped] );
897 }
898 
899 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)900 CLargestIntronSizeArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
901 {
902     arg_desc.SetCurrentGroup("General search options");
903     // largest intron length
904     arg_desc.AddDefaultKey(kArgMaxIntronLength, "length",
905                     "Length of the largest intron allowed in a translated "
906                     "nucleotide sequence when linking multiple distinct "
907                     "alignments",
908                     CArgDescriptions::eInteger,
909                     NStr::IntToString(kDfltArgMaxIntronLength));
910     arg_desc.SetConstraint(kArgMaxIntronLength,
911                            new CArgAllowValuesGreaterThanOrEqual(0));
912     arg_desc.SetCurrentGroup("");
913 }
914 
915 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)916 CLargestIntronSizeArgs::ExtractAlgorithmOptions(const CArgs& args,
917                                                 CBlastOptions& opt)
918 {
919     if ( !args[kArgMaxIntronLength] ) {
920         return;
921     }
922 
923     // sum statistics are defauled to be on unless a cmdline option is set
924     opt.SetLongestIntronLength(args[kArgMaxIntronLength].AsInteger());
925 
926 }
927 
928 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)929 CFrameShiftArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
930 {
931     arg_desc.SetCurrentGroup("General search options");
932     // applicable in blastx/tblastn, off by default
933     arg_desc.AddOptionalKey(kArgFrameShiftPenalty, "frameshift",
934                             "Frame shift penalty (for use with out-of-frame "
935                             "gapped alignment in blastx or tblastn, default "
936                             "ignored)",
937                             CArgDescriptions::eInteger);
938     arg_desc.SetConstraint(kArgFrameShiftPenalty,
939                            new CArgAllowValuesGreaterThanOrEqual(1));
940     arg_desc.SetDependency(kArgFrameShiftPenalty, CArgDescriptions::eExcludes,kArgUngapped);
941     arg_desc.SetCurrentGroup("");
942 }
943 
944 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)945 CFrameShiftArgs::ExtractAlgorithmOptions(const CArgs& args,
946                                          CBlastOptions& opt)
947 {
948     if (args[kArgFrameShiftPenalty]) {
949         if (args[kArgCompBasedStats]) {
950             string cbs = args[kArgCompBasedStats].AsString();
951 
952             if ((cbs[0] != '0' )&& (cbs[0] != 'F')  &&  (cbs[0] != 'f')) {
953             	NCBI_THROW(CInputException, eInvalidInput,
954                        "Composition-adjusted searches are not supported with "
955                        "Out-Of-Frame option, please add -comp_based_stats F ");
956             }
957         }
958 
959         opt.SetOutOfFrameMode();
960         opt.SetFrameShiftPenalty(args[kArgFrameShiftPenalty].AsInteger());
961     }
962 }
963 
964 /// Auxiliary class to validate the genetic code input
965 class CArgAllowGeneticCodeInteger : public CArgAllow
966 {
967 protected:
968      /// Overloaded method from CArgAllow
Verify(const string & value) const969      virtual bool Verify(const string& value) const {
970          static int gcs[] = {1,2,3,4,5,6,9,10,11,12,13,14,15,16,21,22,23,24,25,26,27,28,29,30,31,33};
971          static const set<int> genetic_codes(gcs, gcs+sizeof(gcs)/sizeof(*gcs));
972          const int val = NStr::StringToInt(value);
973          return (genetic_codes.find(val) != genetic_codes.end());
974      }
975 
976      /// Overloaded method from CArgAllow
GetUsage(void) const977      virtual string GetUsage(void) const {
978          return "values between: 1-6, 9-16, 21-31, 33";
979      }
980 };
981 
982 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)983 CGeneticCodeArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
984 {
985     if (m_Target == eQuery) {
986         arg_desc.SetCurrentGroup("Input query options");
987         // query genetic code
988         arg_desc.AddDefaultKey(kArgQueryGeneticCode, "int_value",
989                                "Genetic code to use to translate query (see https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/index.cgi?chapter=cgencodes for details)\n",
990                                CArgDescriptions::eInteger,
991                                NStr::IntToString(BLAST_GENETIC_CODE));
992         arg_desc.SetConstraint(kArgQueryGeneticCode,
993                                new CArgAllowGeneticCodeInteger());
994     } else {
995         arg_desc.SetCurrentGroup("General search options");
996         // DB genetic code
997         arg_desc.AddDefaultKey(kArgDbGeneticCode, "int_value",
998                                "Genetic code to use to translate "
999                                "database/subjects (see user manual for details)\n",
1000                                CArgDescriptions::eInteger,
1001                                NStr::IntToString(BLAST_GENETIC_CODE));
1002         arg_desc.SetConstraint(kArgDbGeneticCode,
1003                                new CArgAllowGeneticCodeInteger());
1004 
1005     }
1006     arg_desc.SetCurrentGroup("");
1007 }
1008 
1009 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)1010 CGeneticCodeArgs::ExtractAlgorithmOptions(const CArgs& args,
1011                                           CBlastOptions& opt)
1012 {
1013     const EProgram program = opt.GetProgram();
1014 
1015     if (m_Target == eQuery && args[kArgQueryGeneticCode]) {
1016         opt.SetQueryGeneticCode(args[kArgQueryGeneticCode].AsInteger());
1017     }
1018 
1019     if (m_Target == eDatabase && args[kArgDbGeneticCode] &&
1020         (program == eTblastn || program == eTblastx) ) {
1021         opt.SetDbGeneticCode(args[kArgDbGeneticCode].AsInteger());
1022     }
1023 }
1024 
1025 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)1026 CGapTriggerArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
1027 {
1028     arg_desc.SetCurrentGroup("Extension options");
1029 
1030     const double default_value = m_QueryIsProtein
1031         ? BLAST_GAP_TRIGGER_PROT : BLAST_GAP_TRIGGER_NUCL;
1032     arg_desc.AddDefaultKey(kArgGapTrigger, "float_value",
1033                            "Number of bits to trigger gapping",
1034                            CArgDescriptions::eDouble,
1035                            NStr::DoubleToString(default_value));
1036     arg_desc.SetCurrentGroup("");
1037 }
1038 
1039 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)1040 CGapTriggerArgs::ExtractAlgorithmOptions(const CArgs& args,
1041                                          CBlastOptions& opt)
1042 {
1043     if (args[kArgGapTrigger]) {
1044         opt.SetGapTrigger(args[kArgGapTrigger].AsDouble());
1045     }
1046 }
1047 
1048 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)1049 CPssmEngineArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
1050 {
1051     arg_desc.SetCurrentGroup("PSSM engine options");
1052 
1053     // Pseudo count
1054     arg_desc.AddDefaultKey(kArgPSIPseudocount, "pseudocount",
1055                            "Pseudo-count value used when constructing PSSM",
1056                            CArgDescriptions::eInteger,
1057                            NStr::IntToString(PSI_PSEUDO_COUNT_CONST));
1058 
1059     if (m_IsDeltaBlast) {
1060         arg_desc.AddDefaultKey(kArgDomainInclusionEThreshold, "ethresh",
1061                                "E-value inclusion threshold for alignments "
1062                                "with conserved domains",
1063                                CArgDescriptions::eDouble,
1064                                NStr::DoubleToString(DELTA_INCLUSION_ETHRESH));
1065     }
1066 
1067     // Evalue inclusion threshold
1068     arg_desc.AddDefaultKey(kArgPSIInclusionEThreshold, "ethresh",
1069                    "E-value inclusion threshold for pairwise alignments",
1070                    CArgDescriptions::eDouble,
1071                    NStr::DoubleToString(PSI_INCLUSION_ETHRESH));
1072 
1073     arg_desc.SetCurrentGroup("");
1074 }
1075 
1076 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)1077 CPssmEngineArgs::ExtractAlgorithmOptions(const CArgs& args,
1078                                          CBlastOptions& opt)
1079 {
1080     if (args[kArgPSIPseudocount]) {
1081         opt.SetPseudoCount(args[kArgPSIPseudocount].AsInteger());
1082     }
1083 
1084     if (args[kArgPSIInclusionEThreshold]) {
1085         opt.SetInclusionThreshold(args[kArgPSIInclusionEThreshold].AsDouble());
1086     }
1087 
1088     if (args.Exist(kArgDomainInclusionEThreshold)
1089         && args[kArgDomainInclusionEThreshold]) {
1090 
1091         opt.SetDomainInclusionThreshold(
1092                              args[kArgDomainInclusionEThreshold].AsDouble());
1093     }
1094 }
1095 
1096 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)1097 CPsiBlastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
1098 {
1099 
1100     if (m_DbTarget == eNucleotideDb) {
1101         arg_desc.SetCurrentGroup("PSI-TBLASTN options");
1102 
1103         // PSI-tblastn checkpoint
1104         arg_desc.AddOptionalKey(kArgPSIInputChkPntFile, "psi_chkpt_file",
1105                                 "PSI-TBLASTN checkpoint file",
1106                                 CArgDescriptions::eInputFile);
1107         arg_desc.SetDependency(kArgPSIInputChkPntFile,
1108                                 CArgDescriptions::eExcludes,
1109                                 kArgRemote);
1110     } else {
1111         arg_desc.SetCurrentGroup("PSI-BLAST options");
1112 
1113         // Number of iterations
1114         arg_desc.AddDefaultKey(kArgPSINumIterations, "int_value",
1115                                "Number of iterations to perform (0 means run "
1116                                "until convergence)", CArgDescriptions::eInteger,
1117                                NStr::IntToString(kDfltArgPSINumIterations));
1118         arg_desc.SetConstraint(kArgPSINumIterations,
1119                                new CArgAllowValuesGreaterThanOrEqual(0));
1120         arg_desc.SetDependency(kArgPSINumIterations,
1121                                CArgDescriptions::eExcludes,
1122                                kArgRemote);
1123         // checkpoint file
1124         arg_desc.AddOptionalKey(kArgPSIOutputChkPntFile, "checkpoint_file",
1125 
1126                                 "File name to store checkpoint file",
1127                                 CArgDescriptions::eOutputFile);
1128         // ASCII matrix file
1129         arg_desc.AddOptionalKey(kArgAsciiPssmOutputFile, "ascii_mtx_file",
1130                                 "File name to store ASCII version of PSSM",
1131                                 CArgDescriptions::eOutputFile);
1132 
1133         arg_desc.AddFlag(kArgSaveLastPssm, "Save PSSM after the last database "
1134                          "search");
1135         arg_desc.AddFlag(kArgSaveAllPssms, "Save PSSM after each iteration "
1136                          "(file name is given in -save_pssm or "
1137                          "-save_ascii_pssm options)");
1138 
1139         if (!m_IsDeltaBlast) {
1140             vector<string> msa_exclusions;
1141             msa_exclusions.push_back(kArgPSIInputChkPntFile);
1142             msa_exclusions.push_back(kArgQuery);
1143             msa_exclusions.push_back(kArgQueryLocation);
1144             // pattern and MSA is not supported
1145             msa_exclusions.push_back(kArgPHIPatternFile);
1146             arg_desc.SetCurrentGroup("");
1147             arg_desc.SetCurrentGroup("");
1148 
1149             // MSA restart file
1150             arg_desc.SetCurrentGroup("PSSM engine options");
1151             arg_desc.AddOptionalKey(kArgMSAInputFile, "align_restart",
1152                                     "File name of multiple sequence alignment to "
1153                                     "restart PSI-BLAST",
1154                                     CArgDescriptions::eInputFile);
1155             ITERATE(vector<string>, exclusion, msa_exclusions) {
1156                 arg_desc.SetDependency(kArgMSAInputFile,
1157                                        CArgDescriptions::eExcludes,
1158                                        *exclusion);
1159             }
1160 
1161             arg_desc.AddOptionalKey(kArgMSAMasterIndex, "index",
1162                                     "Ordinal number (1-based index) of the sequence"
1163                                     " to use as a master in the multiple sequence "
1164                                     "alignment. If not provided, the first sequence"
1165                                     " in the multiple sequence alignment will be "
1166                                     "used", CArgDescriptions::eInteger);
1167             arg_desc.SetConstraint(kArgMSAMasterIndex,
1168                                    new CArgAllowValuesGreaterThanOrEqual(1));
1169             ITERATE(vector<string>, exclusion, msa_exclusions) {
1170                 arg_desc.SetDependency(kArgMSAMasterIndex,
1171                                        CArgDescriptions::eExcludes,
1172                                        *exclusion);
1173             }
1174             arg_desc.SetDependency(kArgMSAMasterIndex,
1175                                    CArgDescriptions::eRequires,
1176                                    kArgMSAInputFile);
1177             arg_desc.SetDependency(kArgMSAMasterIndex,
1178                                    CArgDescriptions::eExcludes,
1179                                    kArgIgnoreMsaMaster);
1180 
1181             arg_desc.AddFlag(kArgIgnoreMsaMaster,
1182                              "Ignore the master sequence when creating PSSM", true);
1183 
1184             vector<string> ignore_pssm_master_exclusions;
1185             ignore_pssm_master_exclusions.push_back(kArgMSAMasterIndex);
1186             ignore_pssm_master_exclusions.push_back(kArgPSIInputChkPntFile);
1187             ignore_pssm_master_exclusions.push_back(kArgQuery);
1188             ignore_pssm_master_exclusions.push_back(kArgQueryLocation);
1189             ITERATE(vector<string>, exclusion, msa_exclusions) {
1190                 arg_desc.SetDependency(kArgIgnoreMsaMaster,
1191                                        CArgDescriptions::eExcludes,
1192                                        *exclusion);
1193             }
1194             arg_desc.SetDependency(kArgIgnoreMsaMaster,
1195                                    CArgDescriptions::eRequires,
1196                                    kArgMSAInputFile);
1197 
1198             // PSI-BLAST checkpoint
1199             arg_desc.AddOptionalKey(kArgPSIInputChkPntFile, "psi_chkpt_file",
1200                                     "PSI-BLAST checkpoint file",
1201                                     CArgDescriptions::eInputFile);
1202         }
1203     }
1204 
1205     if (!m_IsDeltaBlast) {
1206         arg_desc.SetDependency(kArgPSIInputChkPntFile,
1207                                CArgDescriptions::eExcludes,
1208                                kArgQuery);
1209         arg_desc.SetDependency(kArgPSIInputChkPntFile,
1210                                CArgDescriptions::eExcludes,
1211                                kArgQueryLocation);
1212     }
1213     arg_desc.SetCurrentGroup("");
1214 }
1215 
1216 CRef<CPssmWithParameters>
x_CreatePssmFromMsa(CNcbiIstream & input_stream,CBlastOptions & opt,bool save_ascii_pssm,unsigned int msa_master_idx,bool ignore_pssm_tmplt_seq)1217 CPsiBlastArgs::x_CreatePssmFromMsa(CNcbiIstream& input_stream,
1218                                    CBlastOptions& opt, bool save_ascii_pssm,
1219                                    unsigned int msa_master_idx,
1220                                    bool ignore_pssm_tmplt_seq)
1221 {
1222     // FIXME get these from CBlastOptions
1223     CPSIBlastOptions psiblast_opts;
1224     PSIBlastOptionsNew(&psiblast_opts);
1225     psiblast_opts->nsg_compatibility_mode = ignore_pssm_tmplt_seq;
1226 
1227     CPSIDiagnosticsRequest diags(PSIDiagnosticsRequestNewEx(save_ascii_pssm));
1228     CPsiBlastInputClustalW pssm_input(input_stream, *psiblast_opts,
1229                                       opt.GetMatrixName(), diags, NULL, 0,
1230                                       opt.GetGapOpeningCost(),
1231                                       opt.GetGapExtensionCost(),
1232                                       msa_master_idx);
1233     CPssmEngine pssm_engine(&pssm_input);
1234     return pssm_engine.Run();
1235 }
1236 
1237 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)1238 CPsiBlastArgs::ExtractAlgorithmOptions(const CArgs& args,
1239                                        CBlastOptions& opt)
1240 {
1241     if (m_DbTarget == eProteinDb) {
1242         if (args[kArgPSINumIterations]) {
1243         	if(m_NumIterations == 1)
1244         		m_NumIterations = args[kArgPSINumIterations].AsInteger();
1245         }
1246 
1247         if (args.Exist(kArgSaveLastPssm) && args[kArgSaveLastPssm] &&
1248             (!args.Exist(kArgPSIOutputChkPntFile) ||
1249              !args[kArgPSIOutputChkPntFile]) &&
1250             (!args.Exist(kArgAsciiPssmOutputFile) ||
1251              !args[kArgAsciiPssmOutputFile])) {
1252 
1253             NCBI_THROW(CInputException, eInvalidInput, kArgSaveLastPssm +
1254                        " option requires " + kArgPSIOutputChkPntFile + " or " +
1255                        kArgAsciiPssmOutputFile);
1256         }
1257 
1258         if (args.Exist(kArgSaveAllPssms) && args[kArgSaveAllPssms] &&
1259             (!args.Exist(kArgPSIOutputChkPntFile) ||
1260              !args[kArgPSIOutputChkPntFile]) &&
1261             (!args.Exist(kArgAsciiPssmOutputFile) ||
1262              !args[kArgAsciiPssmOutputFile])) {
1263 
1264             NCBI_THROW(CInputException, eInvalidInput, kArgSaveAllPssms +
1265                        " option requires " + kArgPSIOutputChkPntFile + " or " +
1266                        kArgAsciiPssmOutputFile);
1267         }
1268 
1269         const bool kSaveAllPssms
1270             = args.Exist(kArgSaveAllPssms) && args[kArgSaveAllPssms];
1271         if (args.Exist(kArgPSIOutputChkPntFile) &&
1272             args[kArgPSIOutputChkPntFile]) {
1273             m_CheckPointOutput.Reset
1274                 (new CAutoOutputFileReset
1275                  (args[kArgPSIOutputChkPntFile].AsString(), kSaveAllPssms));
1276         }
1277         const bool kSaveAsciiPssm = args[kArgAsciiPssmOutputFile];
1278         if (kSaveAsciiPssm) {
1279             m_AsciiMatrixOutput.Reset
1280                 (new CAutoOutputFileReset
1281                  (args[kArgAsciiPssmOutputFile].AsString(), kSaveAllPssms));
1282         }
1283         if (args.Exist(kArgMSAInputFile) && args[kArgMSAInputFile]) {
1284             CNcbiIstream& in = args[kArgMSAInputFile].AsInputFile();
1285             unsigned int msa_master_idx = 0;
1286             if (args[kArgMSAMasterIndex]) {
1287                 msa_master_idx = args[kArgMSAMasterIndex].AsInteger() - 1;
1288             }
1289             m_Pssm = x_CreatePssmFromMsa(in, opt, kSaveAsciiPssm,
1290                                          msa_master_idx,
1291                                          args[kArgIgnoreMsaMaster]);
1292         }
1293         if (!m_IsDeltaBlast) {
1294             opt.SetIgnoreMsaMaster(args[kArgIgnoreMsaMaster]);
1295         }
1296 
1297         if (args.Exist(kArgSaveLastPssm) && args[kArgSaveLastPssm]) {
1298             m_SaveLastPssm = true;
1299         }
1300     }
1301 
1302     if (args.Exist(kArgPSIInputChkPntFile) && args[kArgPSIInputChkPntFile]) {
1303         CNcbiIstream& in = args[kArgPSIInputChkPntFile].AsInputFile();
1304         _ASSERT(m_Pssm.Empty());
1305         m_Pssm.Reset(new CPssmWithParameters);
1306         try {
1307             switch (CFormatGuess().Format(in)) {
1308             case CFormatGuess::eBinaryASN:
1309                 in >> MSerial_AsnBinary >> *m_Pssm;
1310                 break;
1311             case CFormatGuess::eTextASN:
1312                 in >> MSerial_AsnText >> *m_Pssm;
1313                 break;
1314             case CFormatGuess::eXml:
1315                 in >> MSerial_Xml >> *m_Pssm;
1316                 break;
1317             default:
1318                 NCBI_THROW(CInputException, eInvalidInput,
1319                            "Unsupported format for PSSM");
1320             }
1321         } catch (const CSerialException&) {
1322             string msg("Unrecognized format for PSSM in ");
1323             msg += args[kArgPSIInputChkPntFile].AsString() + " (must be ";
1324             msg += "PssmWithParameters)";
1325             NCBI_THROW(CInputException, eInvalidInput, msg);
1326         }
1327         _ASSERT(m_Pssm.NotEmpty());
1328     }
1329 }
1330 
1331 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)1332 CPhiBlastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
1333 {
1334     arg_desc.SetCurrentGroup("PHI-BLAST options");
1335 
1336     arg_desc.AddOptionalKey(kArgPHIPatternFile, "file",
1337                             "File name containing pattern to search",
1338                             CArgDescriptions::eInputFile);
1339     arg_desc.SetDependency(kArgPHIPatternFile,
1340                            CArgDescriptions::eExcludes,
1341                            kArgPSIInputChkPntFile);
1342 
1343     arg_desc.SetCurrentGroup("");
1344 }
1345 
1346 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)1347 CPhiBlastArgs::ExtractAlgorithmOptions(const CArgs& args,
1348                                        CBlastOptions& opt)
1349 {
1350     if (args.Exist(kArgPHIPatternFile) && args[kArgPHIPatternFile]) {
1351         CNcbiIstream& in = args[kArgPHIPatternFile].AsInputFile();
1352         in.clear();
1353         in.seekg(0);
1354         char buffer[4096];
1355         string line;
1356         string pattern;
1357         string name;
1358         while (in.getline(buffer, 4096)) {
1359            line = buffer;
1360            string ltype = line.substr(0, 2);
1361            if (ltype == "ID")
1362              name = line.substr(4);
1363            else if (ltype == "PA")
1364              pattern = line.substr(4);
1365         }
1366         if (!pattern.empty())
1367             opt.SetPHIPattern(pattern.c_str(),
1368                (Blast_QueryIsNucleotide(opt.GetProgramType())
1369                ? true : false));
1370         else
1371             NCBI_THROW(CInputException, eInvalidInput,
1372                        "PHI pattern not read");
1373     }
1374 }
1375 
1376 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)1377 CKBlastpArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
1378 {
1379    arg_desc.SetCurrentGroup("KBLASTP options");
1380    arg_desc.AddDefaultKey(kArgJDistance, "threshold", "Jaccard Distance",
1381 			CArgDescriptions::eDouble, kDfltArgJDistance);
1382    arg_desc.AddDefaultKey(kArgMinHits, "minhits", "minimal number of LSH matches",
1383 			CArgDescriptions::eInteger, kDfltArgMinHits);
1384    arg_desc.AddDefaultKey(kArgCandidateSeqs, "candidates", "Number of candidate sequences to process with BLAST",
1385 			CArgDescriptions::eInteger, kDfltArgCandidateSeqs);
1386 }
1387 
1388 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)1389 CKBlastpArgs::ExtractAlgorithmOptions(const CArgs& args,
1390                                          CBlastOptions& opt)
1391 {
1392 	if (args.Exist(kArgJDistance))
1393 		m_JDistance = args[kArgJDistance].AsDouble();
1394 	if (args.Exist(kArgMinHits))
1395 		m_MinHits = args[kArgMinHits].AsInteger();
1396 	if (args.Exist(kArgCandidateSeqs))
1397 		m_CandidateSeqs = args[kArgCandidateSeqs].AsInteger();
1398 }
1399 
1400 
1401 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)1402 CDeltaBlastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
1403 {
1404     arg_desc.SetCurrentGroup("DELTA-BLAST options");
1405 
1406     arg_desc.AddDefaultKey(kArgRpsDb, "database_name", "BLAST domain "
1407                            "database name", CArgDescriptions::eString,
1408                            kDfltArgRpsDb);
1409 
1410     arg_desc.AddFlag(kArgShowDomainHits, "Show domain hits");
1411     arg_desc.SetDependency(kArgShowDomainHits, CArgDescriptions::eExcludes,
1412                            kArgRemote);
1413     arg_desc.SetDependency(kArgShowDomainHits, CArgDescriptions::eExcludes,
1414                            kArgSubject);
1415 }
1416 
1417 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)1418 CDeltaBlastArgs::ExtractAlgorithmOptions(const CArgs& args,
1419                                          CBlastOptions& opt)
1420 {
1421     m_DomainDb.Reset(new CSearchDatabase(args[kArgRpsDb].AsString(),
1422                                          CSearchDatabase::eBlastDbIsProtein));
1423 
1424     if (args.Exist(kArgShowDomainHits)) {
1425         m_ShowDomainHits = args[kArgShowDomainHits];
1426     }
1427 }
1428 
1429 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)1430 CMappingArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
1431 {
1432 
1433     arg_desc.SetCurrentGroup("Mapping options");
1434     arg_desc.AddDefaultKey(kArgScore, "num", "Cutoff score for accepting "
1435                            "alignments. Can be expressed as a number or a "
1436                            "function of read length: "
1437                            "L,b,a for a * length + b.\n"
1438                            "Zero means that the cutoff score will be equal to:\n"
1439                            "read length,      if read length <= 20,\n"
1440                            "20,               if read length <= 30,\n"
1441                            "read length - 10, if read length <= 50,\n"
1442                            "40,               otherwise.",
1443                            CArgDescriptions::eString, "0");
1444     arg_desc.AddOptionalKey(kArgMaxEditDist, "num", "Cutoff edit distance for "
1445                             "accepting an alignment\nDefault = unlimited",
1446                             CArgDescriptions::eInteger);
1447     arg_desc.AddDefaultKey(kArgSplice, "TF", "Search for spliced alignments",
1448                            CArgDescriptions::eBoolean, "true");
1449     arg_desc.AddDefaultKey(kArgRefType, "type", "Type of the reference: "
1450                            "genome or transcriptome",
1451                            CArgDescriptions::eString, "genome");
1452     arg_desc.SetConstraint(kArgRefType,
1453                         &(*new CArgAllow_Strings, "genome", "transcriptome"));
1454 
1455     arg_desc.SetCurrentGroup("Query filtering options");
1456     arg_desc.AddDefaultKey(kArgLimitLookup, "TF", "Remove word seeds with "
1457                            "high frequency in the searched database",
1458                            CArgDescriptions::eBoolean, "true");
1459     arg_desc.AddDefaultKey(kArgMaxDbWordCount, "num", "Words that appear more "
1460                            "than this number of times in the database will be"
1461                            " masked in the lookup table",
1462                            CArgDescriptions::eInteger,
1463                            NStr::IntToString(MAX_DB_WORD_COUNT_MAPPER));
1464     arg_desc.AddDefaultKey(kArgLookupStride, "num", "Number of words to skip "
1465 			    "after collecting one while creating a lookup table",
1466                  CArgDescriptions::eInteger, "0");
1467 
1468     arg_desc.SetCurrentGroup("");
1469 }
1470 
1471 
1472 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)1473 CMappingArgs::ExtractAlgorithmOptions(const CArgs& args,
1474                                       CBlastOptions& opt)
1475 {
1476     if (args.Exist(kArgScore) && args[kArgScore]) {
1477 
1478         string s = args[kArgScore].AsString();
1479         // score cutoff may be defined as a liner function of query length:
1480         // L,0.0,0.6 ...
1481         if (s[0] == 'L') {
1482             list<string> tokens;
1483             NStr::Split(s, ",", tokens);
1484             vector<double> coeffs;
1485             if (tokens.size() < 3) {
1486                 NCBI_THROW(CInputException, eInvalidInput,
1487                            (string)"Incorrectly formatted score function: " +
1488                            s + ". It should be of the form 'L,b,a' for ax + b,"
1489                            "a, b must be numbers");
1490             }
1491             auto it = tokens.begin();
1492             ++it;
1493             try {
1494                 for (; it != tokens.end(); ++it) {
1495                     coeffs.push_back(NStr::StringToDouble(*it));
1496                 }
1497             }
1498             catch (CException& e) {
1499                 NCBI_THROW(CInputException, eInvalidInput,
1500                            (string)"Incorrectly formatted score function: " +
1501                            s + ". It should be of the form 'L,b,a' for ax + b,"
1502                            " a, b must be real numbers");
1503             }
1504             opt.SetCutoffScoreCoeffs(coeffs);
1505         }
1506         else {
1507             // ... or a numerical constant
1508             try {
1509                 opt.SetCutoffScore(NStr::StringToInt(s));
1510             }
1511             catch (CException&) {
1512                 NCBI_THROW(CInputException, eInvalidInput,
1513                            (string)"Incorrectly formatted score threshold: " +
1514                            s + ". It must be either an integer or a linear "
1515                            "function in the form: L,b,a for ax + b, a and b "
1516                            "must be real numbers");
1517             }
1518         }
1519     }
1520 
1521     if (args.Exist(kArgMaxEditDist) && args[kArgMaxEditDist]) {
1522         opt.SetMaxEditDistance(args[kArgMaxEditDist].AsInteger());
1523     }
1524 
1525     if (args.Exist(kArgSplice) && args[kArgSplice]) {
1526         opt.SetSpliceAlignments(args[kArgSplice].AsBoolean());
1527     }
1528 
1529     string ref_type = "genome";
1530     if (args.Exist(kArgRefType) && args[kArgRefType]) {
1531         ref_type = args[kArgRefType].AsString();
1532     }
1533 
1534     if (args.Exist(kArgLimitLookup) && args[kArgLimitLookup]) {
1535         opt.SetLookupDbFilter(args[kArgLimitLookup].AsBoolean());
1536     }
1537     else {
1538         opt.SetLookupDbFilter(ref_type == "genome");
1539     }
1540 
1541     if (args.Exist(kArgMaxDbWordCount) && args[kArgMaxDbWordCount]) {
1542         opt.SetMaxDbWordCount(args[kArgMaxDbWordCount].AsInteger());
1543     }
1544 
1545     if (args.Exist(kArgLookupStride) && args[kArgLookupStride]) {
1546         opt.SetLookupTableStride(args[kArgLookupStride].AsInteger());
1547     }
1548 }
1549 
1550 
1551 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)1552 CIgBlastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
1553 {
1554     arg_desc.SetCurrentGroup("Ig-BLAST options");
1555     const static char suffix[] = "VDJ";
1556     const static int df_num_align[3] = {3,3,3};
1557     int num_genes = (m_IsProtein) ? 1 : 3;
1558 
1559 
1560     for (int gene=0; gene<num_genes; ++gene) {
1561         // Subject sequence input
1562         /* TODO disabled for now
1563         string arg_sub = kArgGLSubject;
1564         arg_sub.push_back(suffix[gene]);
1565         arg_desc.AddOptionalKey(arg_sub , "filename",
1566                             "Germline subject sequence to align",
1567                             CArgDescriptions::eInputFile);
1568         */
1569         // Germline database file name
1570         string arg_db = kArgGLDatabase;
1571         arg_db.push_back(suffix[gene]);
1572         arg_desc.AddOptionalKey(arg_db, "germline_database_name",
1573                             "Germline database name",
1574                             CArgDescriptions::eString);
1575         //arg_desc.SetDependency(arg_db, CArgDescriptions::eExcludes, arg_sub);
1576         // Number of alignments to show
1577         string arg_na = kArgGLNumAlign;
1578         arg_na.push_back(suffix[gene]);
1579         arg_desc.AddDefaultKey(arg_na, "int_value",
1580                             "Number of Germline sequences to show alignments for",
1581                             CArgDescriptions::eInteger,
1582                             NStr::IntToString(df_num_align[gene]));
1583         //arg_desc.SetConstraint(arg_na,
1584         //                   new CArgAllowValuesBetween(0, 4));
1585         // Seqidlist
1586         arg_desc.AddOptionalKey(arg_db + "_seqidlist", "filename",
1587                             "Restrict search of germline database to list of SeqIds's",
1588                             CArgDescriptions::eString);
1589     }
1590 
1591     if (!m_IsProtein) {
1592         arg_desc.AddOptionalKey(kArgGLChainType, "filename",
1593                             "File containing the coding frame start positions for sequences in germline J database",
1594                             CArgDescriptions::eString);
1595 
1596         arg_desc.AddOptionalKey(kArgMinDMatch, "min_D_match",
1597                                 "Required minimal consecutive nucleotide base matches for D genes ",
1598                                 CArgDescriptions::eInteger);
1599         arg_desc.SetConstraint(kArgMinDMatch,
1600                                new CArgAllowValuesGreaterThanOrEqual(5));
1601 
1602         arg_desc.AddDefaultKey(kArgVPenalty, "V_penalty",
1603                                 "Penalty for a nucleotide mismatch in V gene",
1604                                 CArgDescriptions::eInteger, "-1");
1605         arg_desc.SetConstraint(kArgVPenalty,
1606                                new CArgAllowValuesBetween(-4, 0));
1607 
1608 
1609         arg_desc.AddDefaultKey(kArgDPenalty, "D_penalty",
1610                                 "Penalty for a nucleotide mismatch in D gene",
1611                                 CArgDescriptions::eInteger, "-2");
1612 
1613         arg_desc.SetConstraint(kArgDPenalty,
1614                                new CArgAllowValuesBetween(-5, 0));
1615 
1616         arg_desc.AddDefaultKey(kArgJPenalty, "J_penalty",
1617                                 "Penalty for a nucleotide mismatch in J gene",
1618                                 CArgDescriptions::eInteger, "-2");
1619 
1620         arg_desc.SetConstraint(kArgJPenalty,
1621                                new CArgAllowValuesBetween(-4, 0));
1622 
1623         arg_desc.AddDefaultKey(kArgNumClonotype, "num_clonotype",
1624                                 "Number of top clonotypes to show ",
1625                                CArgDescriptions::eInteger, "100");
1626         arg_desc.SetConstraint(kArgNumClonotype,
1627                                new CArgAllowValuesGreaterThanOrEqual(0));
1628 
1629         arg_desc.AddOptionalKey(kArgClonotypeFile, "clonotype_out",
1630                                 "Output file name for clonotype info",
1631                                CArgDescriptions::eOutputFile);
1632 
1633         arg_desc.AddFlag(kArgDetectOverlap, "Allow V(D)J genes to overlap.  This option is active only when D_penalty and J_penalty are set to -4 and -3, respectively", true);
1634 
1635 
1636     }
1637 
1638     arg_desc.AddDefaultKey(kArgGLOrigin, "germline_origin",
1639                            "The organism for your query sequence. Supported organisms include human, mouse, rat, rabbit and rhesus_monkey for Ig and human and mouse for TCR. Custom organism is also supported but you need to supply your own germline annotations (see IgBLAST web site for details)",
1640                            CArgDescriptions::eString, "human");
1641 
1642     arg_desc.AddDefaultKey(kArgGLDomainSystem, "domain_system",
1643                            "Domain system to be used for segment annotation",
1644                            CArgDescriptions::eString, "imgt");
1645     arg_desc.SetConstraint(kArgGLDomainSystem, &(*new CArgAllow_Strings, "kabat", "imgt"));
1646 
1647     arg_desc.AddDefaultKey(kArgIgSeqType, "sequence_type",
1648                             "Specify Ig or T cell receptor sequence",
1649                             CArgDescriptions::eString, "Ig");
1650     arg_desc.SetConstraint(kArgIgSeqType, &(*new CArgAllow_Strings, "Ig", "TCR"));
1651 
1652 
1653     arg_desc.AddFlag(kArgGLFocusV, "Should the search only be for V segment (effective only for non-germline database search using -db option)?", true);
1654 
1655     arg_desc.AddFlag(kArgExtendAlign5end, "Extend V gene alignment at 5' end", true);
1656 
1657     arg_desc.AddFlag(kArgExtendAlign3end, "Extend J gene alignment at 3' end", true);
1658 
1659     arg_desc.AddDefaultKey(kArgMinVLength, "Min_V_Length",
1660                            "Minimal required V gene length",
1661                            CArgDescriptions::eInteger, "9");
1662 
1663     arg_desc.SetConstraint(kArgMinVLength,
1664                            new CArgAllowValuesGreaterThanOrEqual(9));
1665 
1666     if (! m_IsProtein) {
1667         arg_desc.AddDefaultKey(kArgMinJLength, "Min_J_Length",
1668                                "Minimal required J gene length",
1669                                CArgDescriptions::eInteger, "0");
1670 
1671         arg_desc.SetConstraint(kArgMinJLength,
1672                                new CArgAllowValuesGreaterThanOrEqual(0));
1673     }
1674 
1675     if (! m_IsProtein) {
1676         arg_desc.AddFlag(kArgTranslate, "Show translated alignments", true);
1677     }
1678 
1679     arg_desc.SetCurrentGroup("");
1680 }
1681 
s_RegisterOMDataLoader(CRef<CSeqDB> db_handle)1682 static string s_RegisterOMDataLoader(CRef<CSeqDB> db_handle)
1683 {    // the blast formatter requires that the database coexist in
1684     // the same scope with the query sequences
1685     CRef<CObjectManager> om = CObjectManager::GetInstance();
1686     CBlastDbDataLoader::RegisterInObjectManager(*om, db_handle, true,
1687                         CObjectManager::eDefault,
1688                         CBlastDatabaseArgs::kSubjectsDataLoaderPriority);
1689     CBlastDbDataLoader::SBlastDbParam param(db_handle);
1690     string retval(CBlastDbDataLoader::GetLoaderNameFromArgs(param));
1691     _TRACE("Registering " << retval << " at priority " <<
1692            (int)CBlastDatabaseArgs::kSubjectsDataLoaderPriority);
1693     return retval;
1694 }
1695 
1696 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opts)1697 CIgBlastArgs::ExtractAlgorithmOptions(const CArgs& args,
1698                                       CBlastOptions& opts)
1699 {
1700     string paths[3];
1701     CNcbiEnvironment env;
1702     paths[0] = CDirEntry::NormalizePath(CDir::GetCwd(), eFollowLinks);
1703     paths[1] = CDirEntry::NormalizePath(env.Get("IGDATA"), eFollowLinks);
1704     CNcbiApplication* app = CNcbiApplication::Instance();
1705     if (app) {
1706         const CNcbiRegistry& registry = app->GetConfig();
1707         paths[2] = CDirEntry::NormalizePath(registry.Get("BLAST","IGDATA"), eFollowLinks);
1708     } else {
1709 #if defined(NCBI_OS_DARWIN)
1710        paths[2] = "/usr/local/ncbi/igblast/data";
1711 #else
1712        paths[2] = paths[0];
1713 #endif
1714     }
1715 
1716     m_IgOptions.Reset(new CIgBlastOptions());
1717 
1718     CBlastDatabaseArgs::EMoleculeType mol_type = Blast_SubjectIsNucleotide(opts.GetProgramType())
1719         ? CSearchDatabase::eBlastDbIsNucleotide
1720         : CSearchDatabase::eBlastDbIsProtein;
1721 
1722     m_IgOptions->m_IsProtein = (mol_type == CSearchDatabase::eBlastDbIsProtein);
1723     m_IgOptions->m_Origin = args[kArgGLOrigin].AsString();
1724     m_IgOptions->m_DomainSystem = args[kArgGLDomainSystem].AsString();
1725     m_IgOptions->m_FocusV = args.Exist(kArgGLFocusV) ? args[kArgGLFocusV] : false;
1726     m_IgOptions->m_ExtendAlign5end = args.Exist(kArgExtendAlign5end) ? args[kArgExtendAlign5end] : false;
1727     m_IgOptions->m_ExtendAlign3end = args.Exist(kArgExtendAlign3end) ? args[kArgExtendAlign3end] : false;
1728     m_IgOptions->m_DetectOverlap = args.Exist(kArgDetectOverlap) ? args[kArgDetectOverlap] : false;
1729     m_IgOptions->m_MinVLength = args[kArgMinVLength].AsInteger();
1730     if (args.Exist(kArgMinJLength) && args[kArgMinJLength]) {
1731         m_IgOptions->m_MinJLength = args[kArgMinJLength].AsInteger();
1732     } else {
1733         m_IgOptions->m_MinJLength = 0;
1734     }
1735     m_IgOptions->m_Translate = args.Exist(kArgTranslate) ? args[kArgTranslate] : false;
1736     if (!m_IsProtein) {
1737         string aux_file = (args.Exist(kArgGLChainType) && args[kArgGLChainType])
1738                              ? args[kArgGLChainType].AsString()
1739                              : m_IgOptions->m_Origin + "_gl.aux";
1740         m_IgOptions->m_AuxFilename = aux_file;
1741         for (int i=0; i<3; i++) {
1742             string aux_path = CDirEntry::ConcatPath(paths[i], aux_file);
1743             CDirEntry entry(aux_path);
1744             if (entry.Exists() && entry.IsFile()) {
1745                 m_IgOptions->m_AuxFilename = aux_path;
1746                 break;
1747             }
1748         }
1749     }
1750 
1751     _ASSERT(m_IsProtein == m_IgOptions->m_IsProtein);
1752 
1753     m_Scope.Reset(new CScope(*CObjectManager::GetInstance()));
1754 
1755     // default germline database name for annotation
1756     for (int i=0; i<3; i++) {
1757         string int_data = CDirEntry::ConcatPath(paths[i], "internal_data");
1758         CDirEntry entry(int_data);
1759         if (entry.Exists() && entry.IsDir()) {
1760             m_IgOptions->m_IgDataPath = int_data;
1761             break;
1762         }
1763     }
1764 
1765     m_IgOptions->m_SequenceType = "Ig";
1766     if (args.Exist(kArgIgSeqType) && args[kArgIgSeqType]) {
1767         m_IgOptions->m_SequenceType = args[kArgIgSeqType].AsString();
1768     }
1769 
1770     string df_db_name = CDirEntry::ConcatPath(
1771                         CDirEntry::ConcatPath(m_IgOptions->m_IgDataPath,
1772                            m_IgOptions->m_Origin), m_IgOptions->m_Origin +
1773                         ((m_IgOptions->m_SequenceType == "TCR")?"_TR":"") + "_V");
1774     CRef<CSearchDatabase> db(new CSearchDatabase(df_db_name, mol_type));
1775     m_IgOptions->m_Db[3].Reset(new CLocalDbAdapter(*db));
1776     try {
1777         db->GetSeqDb();
1778     } catch(...) {
1779         NCBI_THROW(CInputException, eInvalidInput,
1780            "Germline annotation database " + df_db_name + " could not be found in [internal_data] directory");
1781     }
1782 
1783     m_IgOptions->m_Min_D_match = 5;
1784     if (args.Exist(kArgMinDMatch) && args[kArgMinDMatch]) {
1785         m_IgOptions->m_Min_D_match = args[kArgMinDMatch].AsInteger();
1786     }
1787 
1788    if (args.Exist(kArgVPenalty) && args[kArgVPenalty]) {
1789         m_IgOptions->m_V_penalty = args[kArgVPenalty].AsInteger();
1790    }
1791 
1792     if (args.Exist(kArgDPenalty) && args[kArgDPenalty]) {
1793         m_IgOptions->m_D_penalty = args[kArgDPenalty].AsInteger();
1794     }
1795 
1796     if (args.Exist(kArgJPenalty) && args[kArgJPenalty]) {
1797         m_IgOptions->m_J_penalty = args[kArgJPenalty].AsInteger();
1798     }
1799 
1800     CRef<CBlastOptionsHandle> opts_hndl;
1801     if (m_IgOptions->m_IsProtein) {
1802         opts_hndl.Reset(CBlastOptionsFactory::Create(eBlastp));
1803     } else {
1804         opts_hndl.Reset(CBlastOptionsFactory::Create(eBlastn));
1805     }
1806 
1807     const static char suffix[] = "VDJ";
1808     int num_genes = (m_IsProtein) ? 1: 3;
1809     for (int gene=0; gene< num_genes; ++gene) {
1810         string arg_sub = kArgGLSubject;
1811         string arg_db  = kArgGLDatabase;
1812         string arg_na  = kArgGLNumAlign;
1813 
1814         arg_sub.push_back(suffix[gene]);
1815         arg_db.push_back(suffix[gene]);
1816         arg_na.push_back(suffix[gene]);
1817 
1818         m_IgOptions->m_NumAlign[gene] = args[arg_na].AsInteger();
1819 
1820         if (args.Exist(arg_sub) && args[arg_sub]) {
1821             CNcbiIstream& subj_input_stream = args[arg_sub].AsInputFile();
1822             TSeqRange subj_range;
1823 
1824             const bool parse_deflines = args.Exist(kArgParseDeflines)
1825 	        ? bool(args[kArgParseDeflines])
1826                 : kDfltArgParseDeflines;
1827             const bool use_lcase_masks = args.Exist(kArgUseLCaseMasking)
1828 	        ? bool(args[kArgUseLCaseMasking])
1829                 : kDfltArgUseLCaseMasking;
1830             CRef<blast::CBlastQueryVector> subjects;
1831             CRef<CScope> scope = ReadSequencesToBlast(subj_input_stream,
1832                                        m_IgOptions->m_IsProtein,
1833                                        subj_range, parse_deflines,
1834                                        use_lcase_masks, subjects);
1835             m_Scope->AddScope(*scope,
1836                         CBlastDatabaseArgs::kSubjectsDataLoaderPriority);
1837             CRef<IQueryFactory> sub_seqs(
1838                                 new blast::CObjMgr_QueryFactory(*subjects));
1839             m_IgOptions->m_Db[gene].Reset(new CLocalDbAdapter(
1840                                        sub_seqs, opts_hndl));
1841         } else {
1842             string gl_db_name = m_IgOptions->m_Origin + "_gl_";
1843             gl_db_name.push_back(suffix[gene]);
1844             string db_name = (args.Exist(arg_db) && args[arg_db])
1845                        ?  args[arg_db].AsString() :  gl_db_name;
1846             db.Reset(new CSearchDatabase(db_name, mol_type));
1847 
1848             if (args.Exist(arg_db + "_seqidlist") && args[arg_db + "_seqidlist"]) {
1849                 string fn(SeqDB_ResolveDbPath(args[arg_db + "_seqidlist"].AsString()));
1850                 db->SetGiList(CRef<CSeqDBGiList> (new CSeqDBFileGiList(fn,
1851                              CSeqDBFileGiList::eSiList)));
1852             }
1853 
1854             m_IgOptions->m_Db[gene].Reset(new CLocalDbAdapter(*db));
1855             m_Scope->AddDataLoader(s_RegisterOMDataLoader(db->GetSeqDb()));
1856         }
1857     }
1858 }
1859 
1860 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)1861 CQueryOptionsArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
1862 {
1863 
1864     arg_desc.SetCurrentGroup("Query filtering options");
1865     // lowercase masking
1866     arg_desc.AddFlag(kArgUseLCaseMasking,
1867          "Use lower case filtering in query and subject sequence(s)?", true);
1868 
1869     arg_desc.SetCurrentGroup("Input query options");
1870     // query location
1871     arg_desc.AddOptionalKey(kArgQueryLocation, "range",
1872                             "Location on the query sequence in 1-based offsets "
1873                             "(Format: start-stop)",
1874                             CArgDescriptions::eString);
1875 
1876     if ( !m_QueryCannotBeNucl) {
1877         // search strands
1878         arg_desc.AddDefaultKey(kArgStrand, "strand",
1879                      "Query strand(s) to search against database/subject",
1880                                CArgDescriptions::eString, kDfltArgStrand);
1881         arg_desc.SetConstraint(kArgStrand, &(*new CArgAllow_Strings,
1882                                              kDfltArgStrand, "plus", "minus"));
1883     }
1884 
1885     arg_desc.SetCurrentGroup("Miscellaneous options");
1886     arg_desc.AddFlag(kArgParseDeflines,
1887                  "Should the query and subject defline(s) be parsed?", true);
1888 
1889     arg_desc.SetCurrentGroup("");
1890 }
1891 
1892 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)1893 CQueryOptionsArgs::ExtractAlgorithmOptions(const CArgs& args,
1894                                            CBlastOptions& opt)
1895 {
1896     // Get the strand
1897     {
1898         m_Strand = eNa_strand_unknown;
1899 
1900         if (!Blast_QueryIsProtein(opt.GetProgramType())) {
1901 
1902             if (args.Exist(kArgStrand) && args[kArgStrand]) {
1903                 const string& kStrand = args[kArgStrand].AsString();
1904                 if (kStrand == "both") {
1905                     m_Strand = eNa_strand_both;
1906                 } else if (kStrand == "plus") {
1907                     m_Strand = eNa_strand_plus;
1908                 } else if (kStrand == "minus") {
1909                     m_Strand = eNa_strand_minus;
1910                 } else {
1911                     abort();
1912                 }
1913             }
1914             else {
1915                 m_Strand = eNa_strand_both;
1916             }
1917         }
1918     }
1919 
1920     // set the sequence range
1921     if (args.Exist(kArgQueryLocation) && args[kArgQueryLocation]) {
1922         m_Range = ParseSequenceRange(args[kArgQueryLocation].AsString(),
1923                                      "Invalid specification of query location");
1924     }
1925 
1926     m_UseLCaseMask = args.Exist(kArgUseLCaseMasking) &&
1927         static_cast<bool>(args[kArgUseLCaseMasking]);
1928     m_ParseDeflines = args.Exist(kArgParseDeflines) &&
1929         static_cast<bool>(args[kArgParseDeflines]);
1930 }
1931 
1932 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)1933 CMapperQueryOptionsArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
1934 {
1935 
1936     arg_desc.SetCurrentGroup("Query filtering options");
1937     // lowercase masking
1938     arg_desc.AddFlag(kArgUseLCaseMasking,
1939                      "Use lower case filtering in subject sequence(s)?", true);
1940     arg_desc.AddDefaultKey(kArgQualityFilter, "TF", "Reject low quality "
1941                            "sequences ", CArgDescriptions::eBoolean, "true");
1942 
1943     arg_desc.SetCurrentGroup("Input query options");
1944     arg_desc.AddDefaultKey(kArgInputFormat, "format", "Input format for "
1945                            "sequences", CArgDescriptions::eString, "fasta");
1946     arg_desc.SetConstraint(kArgInputFormat, &(*new CArgAllow_Strings,
1947                                               "fasta", "fastc", "fastq",
1948                                               "asn1", "asn1b"));
1949     arg_desc.AddFlag(kArgPaired, "Input query sequences are paired", true);
1950     arg_desc.AddOptionalKey(kArgQueryMate, "infile", "FASTA file with "
1951                             "mates for query sequences (if given in "
1952                             "another file)", CArgDescriptions::eInputFile);
1953     arg_desc.SetDependency(kArgQueryMate, CArgDescriptions::eRequires,
1954                            kArgQuery);
1955 
1956     arg_desc.AddOptionalKey(kArgSraAccession, "accession",
1957                             "Comma-separated SRA accessions",
1958                             CArgDescriptions::eString);
1959     arg_desc.SetDependency(kArgSraAccession, CArgDescriptions::eExcludes,
1960                            kArgQuery);
1961     arg_desc.SetDependency(kArgSraAccession, CArgDescriptions::eExcludes,
1962                           kArgInputFormat);
1963 
1964     arg_desc.AddOptionalKey(kArgSraAccessionBatch, "file",
1965                             "File with a list of SRA accessions, one per line",
1966                             CArgDescriptions::eInputFile);
1967     arg_desc.SetDependency(kArgSraAccessionBatch, CArgDescriptions::eExcludes,
1968                            kArgSraAccession);
1969     arg_desc.SetDependency(kArgSraAccessionBatch, CArgDescriptions::eExcludes,
1970                            kArgQuery);
1971     arg_desc.SetDependency(kArgSraAccessionBatch, CArgDescriptions::eExcludes,
1972                            kArgInputFormat);
1973 
1974     arg_desc.SetCurrentGroup("Miscellaneous options");
1975     arg_desc.AddDefaultKey(kArgParseDeflines, "TF", "Should the query and "
1976                            "subject defline(s) be parsed?",
1977                            CArgDescriptions::eBoolean, "true");
1978 
1979     arg_desc.AddFlag(kArgEnableSraCache, "Enable SRA caching in local files");
1980     arg_desc.SetDependency(kArgEnableSraCache, CArgDescriptions::eRequires,
1981                            kArgSraAccession);
1982 
1983 
1984     arg_desc.SetCurrentGroup("");
1985 }
1986 
1987 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)1988 CMapperQueryOptionsArgs::ExtractAlgorithmOptions(const CArgs& args,
1989                                                  CBlastOptions& opt)
1990 {
1991     CQueryOptionsArgs::ExtractAlgorithmOptions(args, opt);
1992 
1993     if (args.Exist(kArgPaired) && args[kArgPaired]) {
1994         opt.SetPaired(true);
1995         m_IsPaired = true;
1996     }
1997 
1998     if (args.Exist(kArgInputFormat) && args[kArgInputFormat]) {
1999         if (args[kArgInputFormat].AsString() == "fasta") {
2000             m_InputFormat = eFasta;
2001         }
2002         else if (args[kArgInputFormat].AsString() == "fastc") {
2003             m_InputFormat = eFastc;
2004         }
2005         else if (args[kArgInputFormat].AsString() == "fastq") {
2006             m_InputFormat = eFastq;
2007         }
2008         else if (args[kArgInputFormat].AsString() == "asn1") {
2009             m_InputFormat = eASN1text;
2010         }
2011         else if (args[kArgInputFormat].AsString() == "asn1b") {
2012             m_InputFormat = eASN1bin;
2013         }
2014         else {
2015             NCBI_THROW(CInputException, eInvalidInput,
2016                        "Unexpected input format: " +
2017                        args[kArgInputFormat].AsString());
2018         }
2019     }
2020 
2021     if (m_InputFormat == eFastc) {
2022         // FASTC format always has pairs in a single file
2023         opt.SetPaired(true);
2024         m_IsPaired = true;
2025     }
2026 
2027     if (args.Exist(kArgQualityFilter) && args[kArgQualityFilter]) {
2028         opt.SetReadQualityFiltering(args[kArgQualityFilter].AsBoolean());
2029     }
2030 
2031     if (args.Exist(kArgQueryMate) && args[kArgQueryMate]) {
2032         // create a decompress stream is the file is compressed
2033         // (the primary query file is handeled by CStdCmdLieArgs object)
2034         if (NStr::EndsWith(args[kArgQueryMate].AsString(), ".gz",
2035                            NStr::eNocase)) {
2036             m_DecompressIStream.reset(new CDecompressIStream(
2037                                             args[kArgQueryMate].AsInputFile(),
2038                                             CDecompressIStream::eGZipFile));
2039             m_MateInputStream = m_DecompressIStream.get();
2040         }
2041         else {
2042             m_MateInputStream = &args[kArgQueryMate].AsInputFile();
2043         }
2044 
2045         // queries have pairs in the mate stream
2046         opt.SetPaired(true);
2047         m_IsPaired = true;
2048     }
2049 
2050     if ((args.Exist(kArgSraAccession) && args[kArgSraAccession]) ||
2051         (args.Exist(kArgSraAccessionBatch) && args[kArgSraAccessionBatch])) {
2052 
2053         if (args[kArgSraAccession]) {
2054             // accessions given in the command-line
2055             NStr::Split((CTempString)args[kArgSraAccession].AsString(), ",",
2056                         m_SraAccessions);
2057         }
2058         else {
2059             // accessions given in a file
2060             while (!args[kArgSraAccessionBatch].AsInputFile().eof()) {
2061                 string line;
2062                 args[kArgSraAccessionBatch].AsInputFile() >> line;
2063                 if (!line.empty()) {
2064                     m_SraAccessions.push_back(line);
2065                 }
2066             }
2067         }
2068 
2069         if (m_SraAccessions.empty()) {
2070             NCBI_THROW(CInputException, eInvalidInput,
2071                        "No SRA accessions provided");
2072         }
2073 
2074         m_InputFormat = eSra;
2075         // assume SRA input is paired, that information for each read is in
2076         // SRA database, this option will trigger checking for pairs
2077         opt.SetPaired(true);
2078         m_IsPaired = true;
2079     }
2080 
2081     if (args.Exist(kArgEnableSraCache) && args[kArgEnableSraCache]) {
2082         m_EnableSraCache = true;
2083     }
2084 }
2085 
2086 
2087 
CBlastDatabaseArgs(bool request_mol_type,bool is_rpsblast,bool is_igblast,bool is_mapper,bool is_kblast)2088 CBlastDatabaseArgs::CBlastDatabaseArgs(bool request_mol_type /* = false */,
2089                                        bool is_rpsblast /* = false */,
2090                                        bool is_igblast  /* = false */,
2091                                        bool is_mapper   /* = false */,
2092                                        bool is_kblast   /* = false */)
2093     : m_RequestMoleculeType(request_mol_type),
2094       m_IsRpsBlast(is_rpsblast),
2095       m_IsIgBlast(is_igblast),
2096       m_IsProtein(true),
2097       m_IsMapper(is_mapper),
2098       m_IsKBlast(is_kblast),
2099       m_SupportsDatabaseMasking(false),
2100       m_SupportIPGFiltering(false)
2101 {}
2102 
2103 bool
HasBeenSet(const CArgs & args)2104 CBlastDatabaseArgs::HasBeenSet(const CArgs& args)
2105 {
2106     if ( (args.Exist(kArgDb) && args[kArgDb].HasValue()) ||
2107          (args.Exist(kArgSubject) && args[kArgSubject].HasValue()) ) {
2108         return true;
2109     }
2110     return false;
2111 }
2112 
2113 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)2114 CBlastDatabaseArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
2115 {
2116     arg_desc.SetCurrentGroup("General search options");
2117     // database filename
2118     if (m_IsIgBlast){
2119         arg_desc.AddOptionalKey(kArgDb, "database_name", "Optional additional database name",
2120                                 CArgDescriptions::eString);
2121     } else {
2122         arg_desc.AddOptionalKey(kArgDb, "database_name", "BLAST database name",
2123                                 CArgDescriptions::eString);
2124     }
2125 
2126     arg_desc.SetCurrentGroup("");
2127 
2128     if (m_RequestMoleculeType) {
2129         arg_desc.AddKey(kArgDbType, "database_type",
2130                         "BLAST database molecule type",
2131                         CArgDescriptions::eString);
2132         arg_desc.SetConstraint(kArgDbType,
2133                                &(*new CArgAllow_Strings, "prot", "nucl"));
2134     }
2135 
2136     vector<string> database_args;
2137     database_args.push_back(kArgDb);
2138     database_args.push_back(kArgGiList);
2139     database_args.push_back(kArgSeqIdList);
2140     database_args.push_back(kArgNegativeGiList);
2141     database_args.push_back(kArgNegativeSeqidList);
2142     database_args.push_back(kArgTaxIdList);
2143     database_args.push_back(kArgTaxIdListFile);
2144     database_args.push_back(kArgNegativeTaxIdList);
2145     database_args.push_back(kArgNegativeTaxIdListFile);
2146     if (m_SupportIPGFiltering) {
2147     	database_args.push_back(kArgIpgList);
2148     	database_args.push_back(kArgNegativeIpgList);
2149     }
2150     if (m_SupportsDatabaseMasking) {
2151         database_args.push_back(kArgDbSoftMask);
2152         database_args.push_back(kArgDbHardMask);
2153     }
2154 
2155     // DB size
2156     if (!m_IsMapper) {
2157         arg_desc.SetCurrentGroup("Statistical options");
2158         arg_desc.AddOptionalKey(kArgDbSize, "num_letters",
2159                                 "Effective length of the database ",
2160                                 CArgDescriptions::eInt8);
2161     }
2162 
2163     arg_desc.SetCurrentGroup("Restrict search or results");
2164     // GI list
2165     if (!m_IsRpsBlast && !m_IsIgBlast) {
2166         arg_desc.AddOptionalKey(kArgGiList, "filename",
2167                                 "Restrict search of database to list of GIs",
2168                                 CArgDescriptions::eString);
2169         // SeqId list
2170         arg_desc.AddOptionalKey(kArgSeqIdList, "filename",
2171                                 "Restrict search of database to list of SeqIDs",
2172                                 CArgDescriptions::eString);
2173         // Negative GI list
2174         arg_desc.AddOptionalKey(kArgNegativeGiList, "filename",
2175                                 "Restrict search of database to everything"
2176                                 " except the specified GIs",
2177                                 CArgDescriptions::eString);
2178 
2179         // Negative SeqId list
2180         arg_desc.AddOptionalKey(kArgNegativeSeqidList, "filename",
2181                                 "Restrict search of database to everything"
2182                                 " except the specified SeqIDs",
2183                                 CArgDescriptions::eString);
2184 
2185         // Tax ID list
2186         arg_desc.AddOptionalKey(kArgTaxIdList, "taxids",
2187                                 "Restrict search of database to include only "
2188                                 "the specified taxonomy IDs "
2189                                 "(multiple IDs delimited by ',')",
2190                                 CArgDescriptions::eString);
2191         arg_desc.AddOptionalKey(kArgNegativeTaxIdList, "taxids",
2192                                 "Restrict search of database to everything "
2193                                 "except the specified taxonomy IDs "
2194                                 "(multiple IDs delimited by ',')",
2195                                 CArgDescriptions::eString);
2196         // Tax ID list file
2197         arg_desc.AddOptionalKey(kArgTaxIdListFile, "filename",
2198                                 "Restrict search of database to include only "
2199                                 "the specified taxonomy IDs",
2200                                 CArgDescriptions::eString);
2201         arg_desc.AddOptionalKey(kArgNegativeTaxIdListFile, "filename",
2202                                 "Restrict search of database to everything "
2203                                 "except the specified taxonomy IDs",
2204                                 CArgDescriptions::eString);
2205 
2206         if (m_SupportIPGFiltering) {
2207             arg_desc.AddOptionalKey(kArgIpgList, "filename",
2208                                 "Restrict search of database to list of IPGs",
2209                                 CArgDescriptions::eString);
2210 
2211             // Negative IPG list
2212             arg_desc.AddOptionalKey(kArgNegativeIpgList, "filename",
2213                                 "Restrict search of database to everything"
2214                                 " except the specified IPGs",
2215                                 CArgDescriptions::eString);
2216         }
2217         // N.B.: all restricting options are mutually exclusive
2218         const vector<string> kBlastDBFilteringOptions = {
2219             kArgGiList,
2220             kArgSeqIdList,
2221             kArgTaxIdList,
2222             kArgTaxIdListFile,
2223 
2224             kArgNegativeGiList,
2225             kArgNegativeSeqidList,
2226             kArgNegativeTaxIdList,
2227             kArgNegativeTaxIdListFile,
2228 
2229         };
2230         for (size_t i = 0; i < kBlastDBFilteringOptions.size(); i++) {
2231             for (size_t j = i+1; j < kBlastDBFilteringOptions.size(); j++) {
2232                 arg_desc.SetDependency(kBlastDBFilteringOptions[i], CArgDescriptions::eExcludes,
2233                                        kBlastDBFilteringOptions[j]);
2234             }
2235         }
2236 
2237         // For now, disable pairing -remote with either -gilist or
2238         // -negative_gilist as this is not implemented in the BLAST server
2239         for (const string& s: kBlastDBFilteringOptions) {
2240             arg_desc.SetDependency(kArgRemote, CArgDescriptions::eExcludes, s);
2241         }
2242     }
2243 
2244     // Entrez Query
2245     if (!m_IsMapper) {
2246         arg_desc.AddOptionalKey(kArgEntrezQuery, "entrez_query",
2247                                 "Restrict search with the given Entrez query",
2248                                 CArgDescriptions::eString);
2249 
2250         // Entrez query currently requires the -remote option
2251         arg_desc.SetDependency(kArgEntrezQuery, CArgDescriptions::eRequires,
2252                                kArgRemote);
2253     }
2254 
2255 
2256 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \
2257      (!defined(NCBI_COMPILER_MIPSPRO)) )
2258     // Masking of database
2259     if (m_SupportsDatabaseMasking) {
2260         arg_desc.AddOptionalKey(kArgDbSoftMask,
2261                 "filtering_algorithm",
2262                 "Filtering algorithm ID to apply to the BLAST database as soft "
2263                 "masking",
2264                 CArgDescriptions::eString);
2265         arg_desc.SetDependency(kArgDbSoftMask, CArgDescriptions::eExcludes,
2266                            kArgDbHardMask);
2267 
2268         arg_desc.AddOptionalKey(kArgDbHardMask,
2269                 "filtering_algorithm",
2270                 "Filtering algorithm ID to apply to the BLAST database as hard "
2271                 "masking",
2272                 CArgDescriptions::eString);
2273     }
2274 #endif
2275 
2276     // There is no RPS-BLAST 2 sequences
2277     if ( !m_IsRpsBlast && !m_IsKBlast && !m_IsIgBlast) {
2278         arg_desc.SetCurrentGroup("BLAST-2-Sequences options");
2279         // subject sequence input (for bl2seq)
2280         arg_desc.AddOptionalKey(kArgSubject, "subject_input_file",
2281                                 "Subject sequence(s) to search",
2282                                 CArgDescriptions::eInputFile);
2283         ITERATE(vector<string>, dbarg, database_args) {
2284             arg_desc.SetDependency(kArgSubject, CArgDescriptions::eExcludes,
2285                                    *dbarg);
2286         }
2287 
2288         // subject location
2289         arg_desc.AddOptionalKey(kArgSubjectLocation, "range",
2290                         "Location on the subject sequence in 1-based offsets "
2291                         "(Format: start-stop)",
2292                         CArgDescriptions::eString);
2293         ITERATE(vector<string>, dbarg, database_args) {
2294             arg_desc.SetDependency(kArgSubjectLocation,
2295                                    CArgDescriptions::eExcludes,
2296                                    *dbarg);
2297         }
2298         // Because Blast4-subject does not support Seq-locs, specifying a
2299         // subject range does not work for remote searches
2300         arg_desc.SetDependency(kArgSubjectLocation,
2301                                CArgDescriptions::eExcludes, kArgRemote);
2302     }
2303 
2304     arg_desc.SetCurrentGroup("");
2305 }
2306 
2307 
2308 
2309 
s_GetTaxIDList(const string & in,bool isFile,bool isNegativeList,CRef<CSearchDatabase> & sdb)2310 static void s_GetTaxIDList(const string & in, bool isFile, bool isNegativeList, CRef<CSearchDatabase> & sdb)
2311 {
2312 	vector<string> ids;
2313 	if (isFile) {
2314         string filename(SeqDB_ResolveDbPath(in));
2315         if(filename == kEmptyStr) {
2316         	NCBI_THROW(CInputException, eInvalidInput,
2317         			   "File is not acessible: "+ in );
2318         }
2319         CNcbiIfstream instream(filename.c_str());
2320         CStreamLineReader reader(instream);
2321         while (!reader.AtEOF()) {
2322             reader.ReadLine();
2323             ids.push_back(reader.GetCurrentLine());
2324         }
2325     }
2326 	else {
2327         NStr::Split(in, ",", ids, NStr::fSplit_Tokenize);
2328     }
2329 
2330 	set<TTaxId> tax_ids;
2331     for(unsigned int i=0; i < ids.size(); i++) {
2332     	try {
2333     		if(NStr::IsBlank(ids[i])){
2334     			continue;
2335     		}
2336     		tax_ids.insert(NStr::StringToNumeric<TTaxId>(ids[i], NStr::fAllowLeadingSpaces | NStr::fAllowTrailingSpaces));
2337     	}
2338     	catch(CException &){
2339     		NCBI_THROW(CInputException, eInvalidInput, "Invalid taxidlist file ");
2340     	}
2341     }
2342 
2343    	CRef<CSeqDBGiList> taxid_list(new CSeqDBGiList());
2344     taxid_list->AddTaxIds(tax_ids);
2345     if(isNegativeList) {
2346         sdb->SetNegativeGiList(taxid_list.GetPointer());
2347     }
2348     else {
2349         sdb->SetGiList(taxid_list.GetPointer());
2350     }
2351 
2352 }
2353 
2354 
2355 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opts)2356 CBlastDatabaseArgs::ExtractAlgorithmOptions(const CArgs& args,
2357                                             CBlastOptions& opts)
2358 {
2359     EMoleculeType mol_type = Blast_SubjectIsNucleotide(opts.GetProgramType())
2360         ? CSearchDatabase::eBlastDbIsNucleotide
2361         : CSearchDatabase::eBlastDbIsProtein;
2362     m_IsProtein = (mol_type == CSearchDatabase::eBlastDbIsProtein);
2363 
2364     if (args.Exist(kArgDb) && args[kArgDb]) {
2365 
2366         m_SearchDb.Reset(new CSearchDatabase(args[kArgDb].AsString(),
2367                                              mol_type));
2368 
2369         if (args.Exist(kArgGiList) && args[kArgGiList]) {
2370             string fn(SeqDB_ResolveDbPath(args[kArgGiList].AsString()));
2371             m_SearchDb->SetGiList(CRef<CSeqDBGiList> (new CSeqDBFileGiList(fn)));
2372 
2373         } else if (args.Exist(kArgNegativeGiList) && args[kArgNegativeGiList]) {
2374             string fn(SeqDB_ResolveDbPath(args[kArgNegativeGiList].AsString()));
2375             m_SearchDb->SetNegativeGiList(CRef<CSeqDBGiList> (new CSeqDBFileGiList(fn)));
2376 
2377         } else if (args.Exist(kArgSeqIdList) && args[kArgSeqIdList]) {
2378             string fn(SeqDB_ResolveDbPath(args[kArgSeqIdList].AsString()));
2379             m_SearchDb->SetGiList(CRef<CSeqDBGiList> (new CSeqDBFileGiList(fn,
2380                              CSeqDBFileGiList::eSiList)));
2381         } else if (args.Exist(kArgNegativeSeqidList) && args[kArgNegativeSeqidList]) {
2382             string fn(SeqDB_ResolveDbPath(args[kArgNegativeSeqidList].AsString()));
2383             m_SearchDb->SetNegativeGiList(CRef<CSeqDBGiList> (new CSeqDBFileGiList(fn,CSeqDBFileGiList::eSiList)));
2384         } else if (args.Exist(kArgTaxIdList) && args[kArgTaxIdList]) {
2385         	s_GetTaxIDList(args[kArgTaxIdList].AsString(), false, false, m_SearchDb);
2386 
2387         } else if (args.Exist(kArgTaxIdListFile) && args[kArgTaxIdListFile]) {
2388         	s_GetTaxIDList(args[kArgTaxIdListFile].AsString(), true, false, m_SearchDb);
2389 
2390         } else if (args.Exist(kArgNegativeTaxIdList) && args[kArgNegativeTaxIdList]) {
2391         	s_GetTaxIDList(args[kArgNegativeTaxIdList].AsString(), false, true, m_SearchDb);
2392 
2393         } else if (args.Exist(kArgNegativeTaxIdListFile) && args[kArgNegativeTaxIdListFile]) {
2394         	s_GetTaxIDList(args[kArgNegativeTaxIdListFile].AsString(), true, true, m_SearchDb);
2395 
2396         } else if (args.Exist(kArgIpgList) && args[kArgIpgList]) {
2397             string fn(SeqDB_ResolveDbPath(args[kArgIpgList].AsString()));
2398             m_SearchDb->SetGiList(CRef<CSeqDBGiList> (new CSeqDBFileGiList(fn, CSeqDBFileGiList::ePigList)));
2399         }  else if (args.Exist(kArgNegativeIpgList) && args[kArgNegativeIpgList]) {
2400             string fn(SeqDB_ResolveDbPath(args[kArgNegativeIpgList].AsString()));
2401             m_SearchDb->SetNegativeGiList(CRef<CSeqDBGiList> (new CSeqDBFileGiList(fn, CSeqDBFileGiList::ePigList)));
2402 
2403         }
2404 
2405         if (args.Exist(kArgEntrezQuery) && args[kArgEntrezQuery])
2406             m_SearchDb->SetEntrezQueryLimitation(args[kArgEntrezQuery].AsString());
2407 
2408 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \
2409      (!defined(NCBI_COMPILER_MIPSPRO)) )
2410         if (args.Exist(kArgDbSoftMask) && args[kArgDbSoftMask]) {
2411             m_SearchDb->SetFilteringAlgorithm(args[kArgDbSoftMask].AsString(), eSoftSubjMasking);
2412         } else if (args.Exist(kArgDbHardMask) && args[kArgDbHardMask]) {
2413             m_SearchDb->SetFilteringAlgorithm(args[kArgDbHardMask].AsString(), eHardSubjMasking);
2414         }
2415 #endif
2416     } else if (args.Exist(kArgSubject) && args[kArgSubject]) {
2417 
2418         CNcbiIstream* subj_input_stream = NULL;
2419         unique_ptr<CDecompressIStream> decompress_stream;
2420         if (m_IsMapper &&
2421             NStr::EndsWith(args[kArgSubject].AsString(), ".gz", NStr::eNocase)) {
2422             decompress_stream.reset(
2423                         new CDecompressIStream(args[kArgSubject].AsInputFile(),
2424                         CDecompressIStream::eGZipFile));
2425             subj_input_stream = decompress_stream.get();
2426         }
2427         else {
2428             subj_input_stream = &args[kArgSubject].AsInputFile();
2429         }
2430 
2431         TSeqRange subj_range;
2432         if (args.Exist(kArgSubjectLocation) && args[kArgSubjectLocation]) {
2433             subj_range =
2434                 ParseSequenceRange(args[kArgSubjectLocation].AsString(),
2435                             "Invalid specification of subject location");
2436         }
2437 
2438         const bool parse_deflines = args.Exist(kArgParseDeflines)
2439             ? args[kArgParseDeflines].AsBoolean()
2440             : kDfltArgParseDeflines;
2441         const bool use_lcase_masks = args.Exist(kArgUseLCaseMasking)
2442 	    ? bool(args[kArgUseLCaseMasking])
2443             : kDfltArgUseLCaseMasking;
2444         CRef<blast::CBlastQueryVector> subjects;
2445         m_Scope = ReadSequencesToBlast(*subj_input_stream, IsProtein(),
2446                                        subj_range, parse_deflines,
2447                                        use_lcase_masks, subjects, m_IsMapper);
2448         m_Subjects.Reset(new blast::CObjMgr_QueryFactory(*subjects));
2449 
2450     } else if (!m_IsIgBlast){
2451         // IgBlast permits use of germline database
2452         NCBI_THROW(CInputException, eInvalidInput,
2453            "Either a BLAST database or subject sequence(s) must be specified");
2454     }
2455 
2456     if (opts.GetEffectiveSearchSpace() != 0) {
2457         // no need to set any other options, as this trumps them
2458         return;
2459     }
2460 
2461     if (args.Exist(kArgDbSize) && args[kArgDbSize]) {
2462         opts.SetDbLength(args[kArgDbSize].AsInt8());
2463     }
2464 
2465 }
2466 
2467 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)2468 CFormattingArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
2469 {
2470     arg_desc.SetCurrentGroup("Formatting options");
2471 
2472     string kOutputFormatDescription = string(
2473     "alignment view options:\n"
2474     "  0 = Pairwise,\n"
2475     "  1 = Query-anchored showing identities,\n"
2476     "  2 = Query-anchored no identities,\n"
2477     "  3 = Flat query-anchored showing identities,\n"
2478     "  4 = Flat query-anchored no identities,\n"
2479     "  5 = BLAST XML,\n"
2480     "  6 = Tabular,\n"
2481     "  7 = Tabular with comment lines,\n"
2482     "  8 = Seqalign (Text ASN.1),\n"
2483     "  9 = Seqalign (Binary ASN.1),\n"
2484     " 10 = Comma-separated values,\n"
2485     " 11 = BLAST archive (ASN.1),\n"
2486     " 12 = Seqalign (JSON),\n"
2487     " 13 = Multiple-file BLAST JSON,\n"
2488     " 14 = Multiple-file BLAST XML2,\n"
2489     " 15 = Single-file BLAST JSON,\n"
2490     " 16 = Single-file BLAST XML2");
2491 
2492     if(m_FormatFlags & eIsSAM) {
2493     	kOutputFormatDescription += ",\n 17 = Sequence Alignment/Map (SAM)";
2494     }
2495     kOutputFormatDescription += ",\n 18 = Organism Report\n\n";
2496     if(m_FormatFlags & eIsSAM) {
2497     	kOutputFormatDescription +=
2498                 "Options 6, 7, 10 and 17 "
2499     			"can be additionally configured to produce\n"
2500     		    "a custom format specified by space delimited format specifiers,\n"
2501                 "or in the case of options 6, 7, and 10, by a token specified\n"
2502                 "by the delim keyword. E.g.: \"17 delim=@ qacc sacc score\".\n"
2503                 "The delim keyword must appear after the numeric output format\n"
2504                 "specification.\n"
2505     		    "The supported format specifiers for options 6, 7 and 10 are:\n";
2506     }
2507     else {
2508     	kOutputFormatDescription +=
2509     			"Options 6, 7 and 10 "
2510     			"can be additionally configured to produce\n"
2511     		    "a custom format specified by space delimited format specifiers,\n"
2512                 "or by a token specified by the delim keyword.\n"
2513                 " E.g.: \"10 delim=@ qacc sacc score\".\n"
2514                 "The delim keyword must appear after the numeric output format\n"
2515                 "specification.\n"
2516     		    "The supported format specifiers are:\n";
2517     }
2518 
2519     kOutputFormatDescription += DescribeTabularOutputFormatSpecifiers() +  string("\n");
2520 
2521     if(m_FormatFlags & eIsSAM) {
2522     	kOutputFormatDescription +=
2523     			"The supported format specifier for option 17 is:\n" +
2524         		DescribeSAMOutputFormatSpecifiers();
2525     }
2526 
2527 
2528     int dft_outfmt = kDfltArgOutputFormat;
2529 
2530     // Igblast shows extra column of gaps
2531     if (m_IsIgBlast) {
2532         kOutputFormatDescription = string(
2533     "alignment view options:\n"
2534     "  3 = Flat query-anchored, show identities,\n"
2535     "  4 = Flat query-anchored, no identities,\n"
2536     "  7 = Tabular with comment lines\n"
2537     "  19 = Rearrangement summary report (AIRR format)\n\n"
2538     "Options 7 can be additionally configured to produce\n"
2539     "a custom format specified by space delimited format specifiers.\n"
2540     "The supported format specifiers are:\n") +
2541         DescribeTabularOutputFormatSpecifiers(true) +
2542         string("\n");
2543         dft_outfmt = 3;
2544     }
2545 
2546     // alignment view
2547     arg_desc.AddDefaultKey(kArgOutputFormat, "format",
2548                            kOutputFormatDescription,
2549                            CArgDescriptions::eString,
2550                            NStr::IntToString(dft_outfmt));
2551 
2552     // show GIs in deflines
2553     arg_desc.AddFlag(kArgShowGIs, "Show NCBI GIs in deflines?", true);
2554 
2555     // number of one-line descriptions to display
2556     arg_desc.AddOptionalKey(kArgNumDescriptions, "int_value",
2557                  "Number of database sequences to show one-line "
2558                  "descriptions for\n"
2559     		     "Not applicable for outfmt > 4\n"
2560     			 "Default = `"+ NStr::IntToString(m_DfltNumDescriptions)+ "'",
2561                  CArgDescriptions::eInteger);
2562     arg_desc.SetConstraint(kArgNumDescriptions,
2563                            new CArgAllowValuesGreaterThanOrEqual(0));
2564 
2565     // number of alignments per DB sequence
2566     arg_desc.AddOptionalKey(kArgNumAlignments, "int_value",
2567                  "Number of database sequences to show alignments for\n"
2568                  "Default = `" + NStr::IntToString(m_DfltNumAlignments) + "'",
2569                  CArgDescriptions::eInteger );
2570     arg_desc.SetConstraint(kArgNumAlignments,
2571                            new CArgAllowValuesGreaterThanOrEqual(0));
2572 
2573     arg_desc.AddOptionalKey(kArgLineLength, "line_length",
2574     		                "Line length for formatting alignments\n"
2575     						"Not applicable for outfmt > 4\n"
2576     		   			    "Default = `"+ NStr::NumericToString(align_format::kDfltLineLength) + "'",
2577     		                CArgDescriptions::eInteger);
2578     arg_desc.SetConstraint(kArgLineLength,
2579                            new CArgAllowValuesGreaterThanOrEqual(1));
2580 
2581     if(!m_IsIgBlast){
2582         // Produce HTML?
2583         arg_desc.AddFlag(kArgProduceHtml, "Produce HTML output?", true);
2584 
2585 
2586         arg_desc.AddOptionalKey(kArgSortHits, "sort_hits",
2587                            "Sorting option for hits:\n"
2588                            "alignment view options:\n"
2589                            "  0 = Sort by evalue,\n"
2590                            "  1 = Sort by bit score,\n"
2591                            "  2 = Sort by total score,\n"
2592                            "  3 = Sort by percent identity,\n"
2593                            "  4 = Sort by query coverage\n"
2594                            "Not applicable for outfmt > 4\n",
2595                            CArgDescriptions::eInteger);
2596         arg_desc.SetConstraint(kArgSortHits,
2597                            new CArgAllowValuesBetween(CAlignFormatUtil::eEvalue,
2598                                                       CAlignFormatUtil::eQueryCoverage,
2599                                                       true));
2600 
2601         arg_desc.AddOptionalKey(kArgSortHSPs, "sort_hsps",
2602                            "Sorting option for hps:\n"
2603                            "  0 = Sort by hsp evalue,\n"
2604                            "  1 = Sort by hsp score,\n"
2605                            "  2 = Sort by hsp query start,\n"
2606                            "  3 = Sort by hsp percent identity,\n"
2607                            "  4 = Sort by hsp subject start\n"
2608                            "Not applicable for outfmt != 0\n",
2609                            CArgDescriptions::eInteger);
2610         arg_desc.SetConstraint(kArgSortHSPs,
2611                            new CArgAllowValuesBetween(CAlignFormatUtil::eHspEvalue,
2612                                                       CAlignFormatUtil::eSubjectStart,
2613                                                       true));
2614         /// Hit list size, listed here for convenience only
2615         arg_desc.SetCurrentGroup("Restrict search or results");
2616         arg_desc.AddOptionalKey(kArgMaxTargetSequences, "num_sequences",
2617                             "Maximum number of aligned sequences to keep \n"
2618     						"(value of 5 or more is recommended)\n"
2619     						"Default = `" + NStr::IntToString(BLAST_HITLIST_SIZE) + "'",
2620                             CArgDescriptions::eInteger);
2621         arg_desc.SetConstraint(kArgMaxTargetSequences,
2622                            new CArgAllowValuesGreaterThanOrEqual(1));
2623         arg_desc.SetDependency(kArgMaxTargetSequences,
2624                            CArgDescriptions::eExcludes,
2625                            kArgNumDescriptions);
2626         arg_desc.SetDependency(kArgMaxTargetSequences,
2627                            CArgDescriptions::eExcludes,
2628                            kArgNumAlignments);
2629     }
2630     arg_desc.SetCurrentGroup("");
2631 }
2632 
2633 bool
ArchiveFormatRequested(const CArgs & args) const2634 CFormattingArgs::ArchiveFormatRequested(const CArgs& args) const
2635 {
2636     EOutputFormat output_fmt;
2637     string ignore1, ignore2;
2638     ParseFormattingString(args, output_fmt, ignore1, ignore2);
2639     return (output_fmt == eArchiveFormat ? true : false);
2640 }
2641 
2642 
s_ValidateCustomDelim(string custom_fmt_spec,string customDelim)2643 static void s_ValidateCustomDelim(string custom_fmt_spec,string customDelim)
2644 {
2645     bool error = false;
2646     string checkfield;
2647     custom_fmt_spec = NStr::TruncateSpaces(custom_fmt_spec);
2648     if(custom_fmt_spec.empty()) return;
2649 
2650     //Check if delim is already used
2651     const string kFieldsWithSemicolSeparator = "sallseqid staxids sscinames scomnames sblastnames sskingdoms";//sep = ";"
2652     const string kFramesField = "frames"; //sep = "/"
2653     const string kAllTitlesField ="salltitles"; //sep = "<>""
2654 
2655     if(customDelim == ";") {
2656         vector <string> tokens;
2657         NStr::Split(kFieldsWithSemicolSeparator," ", tokens);
2658         for(size_t i = 0; i < tokens.size(); i++)   {
2659             if(NStr::Find(custom_fmt_spec,tokens[i]) != NPOS) {
2660                 checkfield = tokens[i];
2661                 error = true;
2662                 break;
2663             }
2664         }
2665     }
2666     else {
2667         if(customDelim == "/") {
2668             checkfield = kFramesField;
2669         }
2670         else if(customDelim == "<>") {
2671             checkfield = kAllTitlesField;
2672         }
2673         if(!checkfield.empty() && NStr::Find(custom_fmt_spec,checkfield) != NPOS) {
2674             error = true;
2675         }
2676     }
2677 
2678     if(error) {
2679         string msg("Your custom record separator (" + customDelim + ") is also used by the format specifier (" + checkfield +
2680                                                                 ") to separate multiple entries. Please use a different record separator (delim keyword).");
2681         NCBI_THROW(CInputException, eInvalidInput, msg);
2682     }
2683 }
2684 
2685 void
ParseFormattingString(const CArgs & args,EOutputFormat & fmt_type,string & custom_fmt_spec,string & custom_delim) const2686 CFormattingArgs::ParseFormattingString(const CArgs& args,
2687                                        EOutputFormat& fmt_type,
2688                                        string& custom_fmt_spec,
2689                                        string& custom_delim) const
2690 {
2691     custom_fmt_spec.clear();
2692     if (args[kArgOutputFormat]) {
2693         string fmt_choice =
2694             NStr::TruncateSpaces(args[kArgOutputFormat].AsString());
2695         string::size_type pos;
2696         if ( (pos = fmt_choice.find_first_of(' ')) != string::npos) {
2697             custom_fmt_spec.assign(fmt_choice, pos+1,
2698                                    fmt_choice.size()-(pos+1));
2699             fmt_choice.erase(pos);
2700         }
2701         if(!custom_fmt_spec.empty()) {
2702             if(NStr::StartsWith(custom_fmt_spec, "delim")) {
2703                 vector <string> tokens;
2704                 NStr::Split(custom_fmt_spec," ",tokens);
2705                 if(tokens.size() > 0) {
2706                     string tag;
2707                     bool isValid = NStr::SplitInTwo(tokens[0],"=",tag,custom_delim);
2708                     if(!isValid) {
2709                         string msg("Delimiter format is invalid. Valid format is delim=<delimiter value>");
2710                         NCBI_THROW(CInputException, eInvalidInput, msg);
2711                     }
2712                     else {
2713                         custom_fmt_spec = NStr::Replace(custom_fmt_spec,tokens[0],"");
2714                     }
2715                 }
2716             }
2717         }
2718         int val = 0;
2719         try { val = NStr::StringToInt(fmt_choice); }
2720         catch (const CStringException&) {   // probably a conversion error
2721             CNcbiOstrstream os;
2722             os << "'" << fmt_choice << "' is not a valid output format";
2723             string msg = CNcbiOstrstreamToString(os);
2724             NCBI_THROW(CInputException, eInvalidInput, msg);
2725         }
2726         if (val < 0 || val >= static_cast<int>(eEndValue)) {
2727             string msg("Formatting choice is out of range");
2728             throw std::out_of_range(msg);
2729         }
2730         if (m_IsIgBlast && (val != 3 && val != 4 && val != 7 && val != eAirrRearrangement)) {
2731             string msg("Formatting choice is not valid");
2732             throw std::out_of_range(msg);
2733         }
2734         fmt_type = static_cast<EOutputFormat>(val);
2735         if ( !(fmt_type == eTabular ||
2736                fmt_type == eTabularWithComments ||
2737                fmt_type == eCommaSeparatedValues ||
2738                fmt_type == eSAM) ) {
2739                custom_fmt_spec.clear();
2740         }
2741     }
2742 }
2743 
2744 
2745 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)2746 CFormattingArgs::ExtractAlgorithmOptions(const CArgs& args,
2747                                          CBlastOptions& opt)
2748 {
2749     ParseFormattingString(args, m_OutputFormat, m_CustomOutputFormatSpec,m_CustomDelim);
2750     if((m_OutputFormat == eSAM) && !(m_FormatFlags & eIsSAM) ){
2751     		NCBI_THROW(CInputException, eInvalidInput,
2752     		                        "SAM format is only applicable to blastn" );
2753     }
2754     if((m_OutputFormat == eAirrRearrangement) && !(m_FormatFlags & eIsAirrRearrangement) ){
2755         NCBI_THROW(CInputException, eInvalidInput,
2756                    "AIRR rearrangement format is only applicable to igblastn" );
2757     }
2758     if (m_OutputFormat == eFasta) {
2759         NCBI_THROW(CInputException, eInvalidInput,
2760                    "FASTA output format is only applicable to magicblast");
2761     }
2762     s_ValidateCustomDelim(m_CustomOutputFormatSpec,m_CustomDelim);
2763     m_ShowGis = static_cast<bool>(args[kArgShowGIs]);
2764     if(m_IsIgBlast){
2765         m_Html = false;
2766     } else {
2767         m_Html = static_cast<bool>(args[kArgProduceHtml]);
2768     }
2769     // Default hitlist size 500, value can be changed if import search strategy is used
2770     int hitlist_size = opt.GetHitlistSize();
2771 
2772     // To preserve hitlist size in import search strategy > 500,
2773     // we need to increase the  num_ descriptions and num_alignemtns
2774     if(hitlist_size > BLAST_HITLIST_SIZE )
2775     {
2776     	if((!args.Exist(kArgNumDescriptions) || !args[kArgNumDescriptions]) &&
2777            (!args.Exist(kArgNumAlignments) || !args[kArgNumAlignments]) &&
2778     	   (m_OutputFormat <= eFlatQueryAnchoredNoIdentities)) {
2779     		m_NumDescriptions = hitlist_size;
2780     		m_NumAlignments = hitlist_size/ 2;
2781     		return;
2782     	}
2783     }
2784 
2785     if(m_OutputFormat <= eFlatQueryAnchoredNoIdentities) {
2786 
2787 
2788     	 m_NumDescriptions = m_DfltNumDescriptions;
2789     	 m_NumAlignments = m_DfltNumAlignments;
2790 
2791     	 if (args.Exist(kArgNumDescriptions) && args[kArgNumDescriptions]) {
2792     	    m_NumDescriptions = args[kArgNumDescriptions].AsInteger();
2793     	 }
2794 
2795          if (args.Exist(kArgNumAlignments) && args[kArgNumAlignments]) {
2796     		m_NumAlignments = args[kArgNumAlignments].AsInteger();
2797     	}
2798 
2799          if (args.Exist(kArgMaxTargetSequences) && args[kArgMaxTargetSequences]) {
2800     	    m_NumDescriptions = args[kArgMaxTargetSequences].AsInteger();
2801     		m_NumAlignments = args[kArgMaxTargetSequences].AsInteger();
2802     		hitlist_size = m_NumAlignments;
2803          }
2804 
2805     	// The If clause is for handling import_search_strategy hitlist size < 500
2806     	// We want to preserve the hitlist size in iss if no formatting input is entered in cmdline
2807     	// If formmating option(s) is entered than the iss hitlist size is overridden.
2808          // FIXME: does this work with import search strategies?
2809          if ((args.Exist(kArgNumDescriptions) && args[kArgNumDescriptions]) ||
2810              (args.Exist(kArgNumAlignments) && args[kArgNumAlignments])) {
2811     		hitlist_size = max(m_NumDescriptions, m_NumAlignments);
2812     	}
2813 
2814     	if (args[kArgLineLength]) {
2815     	    m_LineLength = args[kArgLineLength].AsInteger();
2816     	}
2817         if(args.Exist(kArgSortHits) && args[kArgSortHits])
2818         {
2819        	    m_HitsSortOption = args[kArgSortHits].AsInteger();
2820         }
2821     }
2822     else
2823     {
2824     	if (args.Exist(kArgNumDescriptions) && args[kArgNumDescriptions]) {
2825    		 ERR_POST(Warning << "The parameter -num_descriptions is ignored for "
2826    				    		 "output formats > 4 . Use -max_target_seqs "
2827    				             "to control output");
2828     	}
2829 
2830     	if (args[kArgLineLength]) {
2831    		 ERR_POST(Warning << "The parameter -line_length is not applicable for "
2832    				    		 "output formats > 4 .");
2833     	}
2834 
2835     	if (args.Exist(kArgMaxTargetSequences) && args[kArgMaxTargetSequences]) {
2836     		hitlist_size = args[kArgMaxTargetSequences].AsInteger();
2837     	}
2838     	else if (args.Exist(kArgNumAlignments) && args[kArgNumAlignments]) {
2839     		hitlist_size = args[kArgNumAlignments].AsInteger();
2840     	}
2841 
2842     	m_NumDescriptions = hitlist_size;
2843     	m_NumAlignments = hitlist_size;
2844 
2845         if(args.Exist(kArgSortHits) && args[kArgSortHits]) {
2846             ERR_POST(Warning << "The parameter -sorthits is ignored for output formats > 4.");
2847         }
2848     }
2849 
2850     if(hitlist_size < 5){
2851    		ERR_POST(Warning << "Examining 5 or more matches is recommended");
2852     }
2853     opt.SetHitlistSize(hitlist_size);
2854 
2855     if(args.Exist(kArgSortHSPs) && args[kArgSortHSPs])
2856     {
2857         int hspsSortOption = args[kArgSortHSPs].AsInteger();
2858         if(m_OutputFormat == ePairwise) {
2859        	    m_HspsSortOption = hspsSortOption;
2860         }
2861         else {
2862             ERR_POST(Warning << "The parameter -sorthsps is ignored for output formats != 0.");
2863         }
2864     }
2865     return;
2866 }
2867 
2868 
2869 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)2870 CMapperFormattingArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
2871 {
2872     arg_desc.SetCurrentGroup("Formatting options");
2873     string kOutputFormatDescription = string(
2874                                              "alignment view options:\n"
2875                                              "sam = SAM format,\n"
2876                                              "tabular = Tabular format,\n"
2877                                              "asn = text ASN.1\n");
2878 
2879     string kUnalignedOutputFormatDescription = string(
2880                                     "format for reporting unaligned reads:\n"
2881                                     "sam = SAM format,\n"
2882                                     "tabular = Tabular format,\n"
2883                                     "fasta = sequences in FASTA format\n"
2884                                     "Default = same as ") +
2885                                     align_format::kArgOutputFormat;
2886 
2887     arg_desc.AddDefaultKey(align_format::kArgOutputFormat, "format",
2888                            kOutputFormatDescription,
2889                            CArgDescriptions::eString,
2890                            "sam");
2891 
2892     set<string> allowed_formats = {"sam", "tabular", "asn"};
2893     arg_desc.SetConstraint(align_format::kArgOutputFormat,
2894                            new CArgAllowStringSet(allowed_formats));
2895 
2896     arg_desc.AddOptionalKey(kArgUnalignedFormat, "format",
2897                             kUnalignedOutputFormatDescription,
2898                             CArgDescriptions::eString);
2899 
2900     set<string> allowed_unaligned_formats = {"sam", "tabular", "fasta"};
2901     arg_desc.SetConstraint(kArgUnalignedFormat,
2902                            new CArgAllowStringSet(allowed_unaligned_formats));
2903 
2904     arg_desc.SetDependency(kArgUnalignedFormat, CArgDescriptions::eRequires,
2905                            kArgUnalignedOutput);
2906 
2907 
2908     arg_desc.AddFlag(kArgPrintMdTag, "Include MD tag in SAM report");
2909     arg_desc.AddFlag(kArgNoReadIdTrim, "Do not trim '.1', '/1', '.2', " \
2910                      "or '/2' at the end of read ids for SAM format and" \
2911                      "paired runs");
2912 
2913     arg_desc.AddFlag(kArgNoUnaligned, "Do not report unaligned reads");
2914 
2915     arg_desc.AddFlag(kArgNoDiscordant,
2916             "Suppress discordant alignments for paired reads");
2917 
2918     arg_desc.SetCurrentGroup("");
2919 }
2920 
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opt)2921 void CMapperFormattingArgs::ExtractAlgorithmOptions(const CArgs& args,
2922                                                     CBlastOptions& opt)
2923 {
2924     if (args.Exist(align_format::kArgOutputFormat)) {
2925         string fmt_choice = args[align_format::kArgOutputFormat].AsString();
2926         if (fmt_choice == "sam") {
2927             m_OutputFormat = eSAM;
2928         }
2929         else if (fmt_choice == "tabular") {
2930             m_OutputFormat = eTabular;
2931         }
2932         else if (fmt_choice == "asn") {
2933             m_OutputFormat = eAsnText;
2934         }
2935         else {
2936             CNcbiOstrstream os;
2937             os << "'" << fmt_choice << "' is not a valid output format";
2938             string msg = CNcbiOstrstreamToString(os);
2939             NCBI_THROW(CInputException, eInvalidInput, msg);
2940         }
2941 
2942         m_UnalignedOutputFormat = m_OutputFormat;
2943     }
2944 
2945     if (args.Exist(kArgUnalignedFormat) && args[kArgUnalignedFormat]) {
2946         string fmt_choice = args[kArgUnalignedFormat].AsString();
2947         if (fmt_choice == "sam") {
2948             m_UnalignedOutputFormat = eSAM;
2949         }
2950         else if (fmt_choice == "tabular") {
2951             m_UnalignedOutputFormat = eTabular;
2952         }
2953         else if (fmt_choice == "fasta") {
2954             m_UnalignedOutputFormat = eFasta;
2955         }
2956         else {
2957             CNcbiOstrstream os;
2958             os << "'" << fmt_choice
2959                << "' is not a valid output format for unaligned reads";
2960             string msg = CNcbiOstrstreamToString(os);
2961             NCBI_THROW(CInputException, eInvalidInput, msg);
2962         }
2963     }
2964 
2965     m_ShowGis = true;
2966     m_Html = false;
2967 
2968     if (args.Exist(kArgNoReadIdTrim) && args[kArgNoReadIdTrim]) {
2969         m_TrimReadIds = false;
2970     }
2971 
2972     if (args.Exist(kArgNoUnaligned) && args[kArgNoUnaligned]) {
2973         m_PrintUnaligned = false;
2974     }
2975 
2976     if (args.Exist(kArgNoDiscordant) && args[kArgNoDiscordant]) {
2977         m_NoDiscordant = true;
2978     }
2979 
2980     if (args.Exist(kArgFwdRev) && args[kArgFwdRev]) {
2981         m_FwdRev = true;
2982     }
2983 
2984     if (args.Exist(kArgRevFwd) && args[kArgRevFwd]) {
2985         m_RevFwd = true;
2986     }
2987 
2988     if (args.Exist(kArgFwdOnly) && args[kArgFwdOnly]) {
2989         m_FwdOnly = true;
2990     }
2991 
2992     if (args.Exist(kArgRevOnly) && args[kArgRevOnly]) {
2993         m_RevOnly = true;
2994     }
2995 
2996     if (args.Exist(kArgOnlyStrandSpecific) && args[kArgOnlyStrandSpecific]) {
2997         m_OnlyStrandSpecific = true;
2998     }
2999 
3000     if (args.Exist(kArgPrintMdTag) && args[kArgPrintMdTag]) {
3001         m_PrintMdTag = true;
3002     }
3003 
3004    // only the fast tabular format is able to show merged HSPs with
3005     // common query bases
3006     if (m_OutputFormat != eTabular) {
3007         // FIXME: This is a hack. Merging should be done by the formatter,
3008         // but is currently done by HSP stream writer. This is an easy
3009         // switch until merging is implemented properly.
3010         CNcbiEnvironment().Set("MAPPER_NO_OVERLAPPED_HSP_MERGE", "1");
3011     }
3012 }
3013 
3014 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)3015 CMTArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
3016 {
3017     // number of threads
3018     arg_desc.SetCurrentGroup("Miscellaneous options");
3019 #ifdef NCBI_THREADS
3020     const int kMinValue = static_cast<int>(CThreadable::kMinNumThreads);
3021     const int kMaxValue = static_cast<int>(CSystemInfo::GetCpuCount());
3022     const int kDfltValue = m_NumThreads != CThreadable::kMinNumThreads
3023         ? std::min<int>(m_NumThreads, kMaxValue) : kMinValue;
3024 
3025     arg_desc.AddDefaultKey(kArgNumThreads, "int_value",
3026                            "Number of threads (CPUs) to use in the BLAST search",
3027                            CArgDescriptions::eInteger,
3028                            NStr::IntToString(kDfltValue));
3029     arg_desc.SetConstraint(kArgNumThreads,
3030                            new CArgAllowValuesGreaterThanOrEqual(kMinValue));
3031     arg_desc.SetDependency(kArgNumThreads,
3032                            CArgDescriptions::eExcludes,
3033                            kArgRemote);
3034 
3035     if (m_MTMode >= 0) {
3036     	arg_desc.AddDefaultKey(kArgMTMode, "int_value",
3037                                "Multi-thread mode to use in BLAST search:\n "
3038                                "0 (auto) split by database \n "
3039                                "1 split by queries",
3040                                CArgDescriptions::eInteger,
3041                                NStr::IntToString(0));
3042     	arg_desc.SetConstraint(kArgMTMode,
3043                                new CArgAllowValuesBetween(0, 1, true));
3044     	arg_desc.SetDependency(kArgMTMode,
3045                                CArgDescriptions::eRequires,
3046                                kArgNumThreads);
3047     }
3048     /*
3049     arg_desc.SetDependency(kArgNumThreads,
3050                            CArgDescriptions::eExcludes,
3051                            kArgUseIndex);
3052     */
3053 #endif
3054     arg_desc.SetCurrentGroup("");
3055 }
3056 
CMTArgs(const CArgs & args)3057 CMTArgs::CMTArgs(const CArgs& args)
3058 {
3059 	x_ExtractAlgorithmOptions(args);
3060 }
3061 
3062 
3063 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions &)3064 CMTArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& /* opts */)
3065 {
3066 	x_ExtractAlgorithmOptions(args);
3067 }
3068 void
x_ExtractAlgorithmOptions(const CArgs & args)3069 CMTArgs::x_ExtractAlgorithmOptions(const CArgs& args)
3070 {
3071     const int kMaxValue = static_cast<int>(CSystemInfo::GetCpuCount());
3072 
3073     if (args.Exist(kArgNumThreads) &&
3074         args[kArgNumThreads].HasValue()) {  // could be cancelled by the exclusion in CRemoteArgs
3075 
3076         // use the minimum of the two: user requested number of threads and
3077         // number of available CPUs for number of threads
3078         int num_threads = args[kArgNumThreads].AsInteger();
3079         if (num_threads > kMaxValue) {
3080             m_NumThreads = kMaxValue;
3081 
3082             ERR_POST(Warning << (string)"Number of threads was reduced to " +
3083                      NStr::IntToString((unsigned int)m_NumThreads) +
3084                      " to match the number of available CPUs");
3085         }
3086         else {
3087             m_NumThreads = num_threads;
3088         }
3089 
3090         // This is temporarily ignored (per SB-635)
3091         if (args.Exist(kArgSubject) && args[kArgSubject].HasValue() &&
3092             m_NumThreads != CThreadable::kMinNumThreads) {
3093             m_NumThreads = CThreadable::kMinNumThreads;
3094             string opt = kArgNumThreads;
3095             if (args.Exist(kArgMTMode) &&
3096                (args[kArgMTMode].AsInteger() == CMTArgs::eSplitByQueries)) {
3097             	m_MTMode = CMTArgs::eSplitByDB;
3098             	opt += " and " + kArgMTMode;
3099             }
3100             ERR_POST(Warning << "'" << opt << "' is currently "
3101                      << "ignored when '" << kArgSubject << "' is specified.");
3102             return;
3103         }
3104     }
3105     if (args.Exist(kArgMTMode) && args[kArgMTMode].HasValue()) {
3106     	m_MTMode = (EMTMode) args[kArgMTMode].AsInteger();
3107     }
3108 
3109 }
3110 
3111 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)3112 CRemoteArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
3113 {
3114     arg_desc.SetCurrentGroup("Miscellaneous options");
3115     arg_desc.AddFlag(kArgRemote, "Execute search remotely?", true);
3116 
3117     arg_desc.SetCurrentGroup("");
3118 }
3119 
3120 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions &)3121 CRemoteArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& /* opts */)
3122 {
3123     if (args.Exist(kArgRemote)) {
3124         m_IsRemote = static_cast<bool>(args[kArgRemote]);
3125     }
3126 }
3127 
3128 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)3129 CDebugArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
3130 {
3131 #if _BLAST_DEBUG
3132     arg_desc.SetCurrentGroup("Miscellaneous options");
3133     arg_desc.AddFlag("verbose", "Produce verbose output (show BLAST options)",
3134                      true);
3135     arg_desc.AddFlag("remote_verbose",
3136                      "Produce verbose output for remote searches", true);
3137     arg_desc.AddFlag("use_test_remote_service",
3138                      "Send remote requests to test servers", true);
3139     arg_desc.SetCurrentGroup("");
3140 #endif /* _BLAST_DEBUG */
3141 }
3142 
3143 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions &)3144 CDebugArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& /* opts */)
3145 {
3146 #if _BLAST_DEBUG
3147     m_DebugOutput = static_cast<bool>(args["verbose"]);
3148     m_RmtDebugOutput = static_cast<bool>(args["remote_verbose"]);
3149     if (args["use_test_remote_service"]) {
3150         IRWRegistry& reg = CNcbiApplication::Instance()->GetConfig();
3151         reg.Set("BLAST4", DEF_CONN_REG_SECTION "_" REG_CONN_SERVICE_NAME,
3152                 "blast4_test");
3153     }
3154 #endif /* _BLAST_DEBUG */
3155 }
3156 
3157 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)3158 CHspFilteringArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
3159 {
3160     // culling limit
3161     arg_desc.SetCurrentGroup("Restrict search or results");
3162     arg_desc.AddOptionalKey(kArgCullingLimit, "int_value",
3163                      "If the query range of a hit is enveloped by that of at "
3164                      "least this many higher-scoring hits, delete the hit",
3165                      CArgDescriptions::eInteger);
3166     arg_desc.SetConstraint(kArgCullingLimit,
3167     // best hit algorithm arguments
3168                new CArgAllowValuesGreaterThanOrEqual(kDfltArgCullingLimit));
3169 
3170     arg_desc.AddOptionalKey(kArgBestHitOverhang, "float_value",
3171                             "Best Hit algorithm overhang value "
3172                             "(recommended value: " +
3173                             NStr::DoubleToString(kDfltArgBestHitOverhang) +
3174                             ")",
3175                             CArgDescriptions::eDouble);
3176     arg_desc.SetConstraint(kArgBestHitOverhang,
3177                            new CArgAllowValuesBetween(kBestHit_OverhangMin,
3178                                                       kBestHit_OverhangMax));
3179     arg_desc.SetDependency(kArgBestHitOverhang,
3180                            CArgDescriptions::eExcludes,
3181                            kArgCullingLimit);
3182 
3183     arg_desc.AddOptionalKey(kArgBestHitScoreEdge, "float_value",
3184                             "Best Hit algorithm score edge value "
3185                             "(recommended value: " +
3186                             NStr::DoubleToString(kDfltArgBestHitScoreEdge) +
3187                             ")",
3188                             CArgDescriptions::eDouble);
3189     arg_desc.SetConstraint(kArgBestHitScoreEdge,
3190                            new CArgAllowValuesBetween(kBestHit_ScoreEdgeMin,
3191                                                       kBestHit_ScoreEdgeMax));
3192     arg_desc.SetDependency(kArgBestHitScoreEdge,
3193                            CArgDescriptions::eExcludes,
3194                            kArgCullingLimit);
3195     arg_desc.AddFlag(kArgSubjectBestHit, "Turn on best hit per subject sequence", true);
3196 
3197     arg_desc.SetCurrentGroup("");
3198 }
3199 
3200 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opts)3201 CHspFilteringArgs::ExtractAlgorithmOptions(const CArgs& args,
3202                                       CBlastOptions& opts)
3203 {
3204     if (args[kArgCullingLimit]) {
3205         opts.SetCullingLimit(args[kArgCullingLimit].AsInteger());
3206     }
3207     if (args[kArgBestHitOverhang]) {
3208         opts.SetBestHitOverhang(args[kArgBestHitOverhang].AsDouble());
3209     }
3210     if (args[kArgBestHitScoreEdge]) {
3211         opts.SetBestHitScoreEdge(args[kArgBestHitScoreEdge].AsDouble());
3212     }
3213     if (args[kArgSubjectBestHit]) {
3214     	opts.SetSubjectBestHit();
3215     }
3216 }
3217 
3218 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)3219 CMbIndexArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
3220 {
3221     arg_desc.SetCurrentGroup("General search options");
3222     arg_desc.AddDefaultKey(
3223             kArgUseIndex, "boolean",
3224             "Use MegaBLAST database index",
3225             CArgDescriptions::eBoolean, NStr::BoolToString(kDfltArgUseIndex));
3226     arg_desc.AddOptionalKey(
3227             kArgIndexName, "string",
3228             "MegaBLAST database index name (deprecated; use only for old style indices)",
3229             CArgDescriptions::eString );
3230     arg_desc.SetCurrentGroup( "" );
3231 }
3232 
3233 bool
HasBeenSet(const CArgs & args)3234 CMbIndexArgs::HasBeenSet(const CArgs& args)
3235 {
3236     if ( (args.Exist(kArgUseIndex) && args[kArgUseIndex].HasValue()) ||
3237          (args.Exist(kArgIndexName) && args[kArgIndexName].HasValue()) ) {
3238         return true;
3239     }
3240     return false;
3241 }
3242 
3243 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions & opts)3244 CMbIndexArgs::ExtractAlgorithmOptions(const CArgs& args,
3245                                       CBlastOptions& opts)
3246 {
3247     // MB Index does not apply to Blast2Sequences
3248     if( args.Exist( kArgUseIndex ) &&
3249         !(args.Exist( kArgSubject ) && args[kArgSubject])) {
3250 
3251         bool use_index   = true;
3252         bool force_index = false;
3253         bool old_style_index = false;
3254 
3255         if( args[kArgUseIndex] ) {
3256             if( args[kArgUseIndex].AsBoolean() ) force_index = true;
3257             else use_index = false;
3258         }
3259 
3260         if( args.Exist( kTask ) && args[kTask] &&
3261                 args[kTask].AsString() != "megablast" ) {
3262             use_index = false;
3263         }
3264 
3265         if( use_index ) {
3266             string index_name;
3267 
3268             if( args.Exist( kArgIndexName ) && args[kArgIndexName] ) {
3269                 index_name = args[kArgIndexName].AsString();
3270                 old_style_index = true;
3271             }
3272             else if( args.Exist( kArgDb ) && args[kArgDb] ) {
3273                 index_name = args[kArgDb].AsString();
3274             }
3275             else {
3276                 NCBI_THROW(CInputException, eInvalidInput,
3277                         "Can not deduce database index name" );
3278             }
3279 
3280             opts.SetUseIndex( true, index_name, force_index, old_style_index );
3281         }
3282     }
3283 }
3284 
3285 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)3286 CStdCmdLineArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
3287 {
3288     arg_desc.SetCurrentGroup("Input query options");
3289 
3290     // query filename
3291     arg_desc.AddDefaultKey(kArgQuery, "input_file",
3292                      "Input file name",
3293                      CArgDescriptions::eInputFile, kDfltArgQuery);
3294     // for now it's either -query or -sra
3295     if( m_SRAaccessionEnabled ) {
3296         arg_desc.AddOptionalKey(kArgSraAccession, "accession",
3297                             "Comma-separated SRA accessions",
3298                             CArgDescriptions::eString);
3299 	arg_desc.SetDependency(kArgSraAccession,
3300 			CArgDescriptions::eExcludes,
3301 			kArgQuery);
3302     }
3303 
3304     arg_desc.SetCurrentGroup("General search options");
3305 
3306     // report output file
3307     arg_desc.AddDefaultKey(kArgOutput, "output_file",
3308                    "Output file name",
3309                    CArgDescriptions::eOutputFile, "-");
3310 
3311     if (m_GzipEnabled) {
3312         arg_desc.AddFlag(kArgOutputGzip, "Output will be compressed");
3313     }
3314 
3315     arg_desc.SetCurrentGroup("");
3316 }
3317 
3318 void
ExtractAlgorithmOptions(const CArgs & args,CBlastOptions &)3319 CStdCmdLineArgs::ExtractAlgorithmOptions(const CArgs& args,
3320                                          CBlastOptions& /* opt */)
3321 {
3322     if (args.Exist(kArgQuery) && args[kArgQuery].HasValue() &&
3323         m_InputStream == NULL) {
3324 
3325         if (m_GzipEnabled &&
3326             NStr::EndsWith(args[kArgQuery].AsString(), ".gz", NStr::eNocase)) {
3327             m_DecompressIStream.reset(new CDecompressIStream(
3328                                                args[kArgQuery].AsInputFile(),
3329                                                CDecompressIStream::eGZipFile));
3330             m_InputStream = m_DecompressIStream.get();
3331         }
3332         else {
3333             m_InputStream = &args[kArgQuery].AsInputFile();
3334         }
3335     }
3336 
3337     if (args.Exist(kArgOutputGzip) && args[kArgOutputGzip]) {
3338         m_CompressOStream.reset(new CCompressOStream(
3339                                               args[kArgOutput].AsOutputFile(),
3340                                               CCompressOStream::eGZipFile));
3341         m_OutputStream = m_CompressOStream.get();
3342     }
3343     else {
3344         m_OutputStream = &args[kArgOutput].AsOutputFile();
3345     }
3346 
3347     // stream for unaligned reads in magicblast
3348     if (args.Exist(kArgUnalignedOutput) && args[kArgUnalignedOutput]) {
3349         if (args.Exist(kArgOutputGzip) && args[kArgOutputGzip]) {
3350             m_UnalignedCompressOStream.reset(new CCompressOStream(
3351                                    args[kArgUnalignedOutput].AsOutputFile(),
3352                                    CCompressOStream::eGZipFile));
3353             m_UnalignedOutputStream = m_UnalignedCompressOStream.get();
3354         }
3355         else {
3356             m_UnalignedOutputStream = &args[kArgUnalignedOutput].AsOutputFile();
3357         }
3358     }
3359 }
3360 
3361 CNcbiIstream&
GetInputStream() const3362 CStdCmdLineArgs::GetInputStream() const
3363 {
3364     // programmer must ensure the ExtractAlgorithmOptions method is called
3365     // before this method is invoked
3366     if ( !m_InputStream ) {
3367         abort();
3368     }
3369     return *m_InputStream;
3370 }
3371 
3372 CNcbiOstream&
GetOutputStream() const3373 CStdCmdLineArgs::GetOutputStream() const
3374 {
3375     // programmer must ensure the ExtractAlgorithmOptions method is called
3376     // before this method is invoked
3377     _ASSERT(m_OutputStream);
3378     return *m_OutputStream;
3379 }
3380 
3381 void
SetInputStream(CRef<CTmpFile> input_file)3382 CStdCmdLineArgs::SetInputStream(CRef<CTmpFile> input_file)
3383 {
3384     m_QueryTmpInputFile = input_file;
3385     m_InputStream = &input_file->AsInputFile(CTmpFile::eIfExists_Throw);
3386 }
3387 
3388 void
SetArgumentDescriptions(CArgDescriptions & arg_desc)3389 CSearchStrategyArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
3390 {
3391     arg_desc.SetCurrentGroup("Search strategy options");
3392 
3393     arg_desc.AddOptionalKey(kArgInputSearchStrategy,
3394                             "filename",
3395                             "Search strategy to use",
3396                             CArgDescriptions::eInputFile);
3397     arg_desc.AddOptionalKey(kArgOutputSearchStrategy,
3398                             "filename",
3399                             "File name to record the search strategy used",
3400                             CArgDescriptions::eOutputFile);
3401     arg_desc.SetDependency(kArgInputSearchStrategy,
3402                            CArgDescriptions::eExcludes,
3403                            kArgOutputSearchStrategy);
3404 
3405     arg_desc.SetCurrentGroup("");
3406 }
3407 
3408 void
ExtractAlgorithmOptions(const CArgs &,CBlastOptions &)3409 CSearchStrategyArgs::ExtractAlgorithmOptions(const CArgs& /* cmd_line_args */,
3410                                              CBlastOptions& /* options */)
3411 {
3412 }
3413 
3414 CNcbiIstream*
GetImportStream(const CArgs & args) const3415 CSearchStrategyArgs::GetImportStream(const CArgs& args) const
3416 {
3417     CNcbiIstream* retval = NULL;
3418     if (args.Exist(kArgInputSearchStrategy) &&
3419         args[kArgInputSearchStrategy].HasValue()) {
3420         retval = &args[kArgInputSearchStrategy].AsInputFile();
3421     }
3422     return retval;
3423 }
3424 
3425 CNcbiOstream*
GetExportStream(const CArgs & args) const3426 CSearchStrategyArgs::GetExportStream(const CArgs& args) const
3427 {
3428     CNcbiOstream* retval = NULL;
3429     if (args.Exist(kArgOutputSearchStrategy) &&
3430         args[kArgOutputSearchStrategy].HasValue()) {
3431         retval = &args[kArgOutputSearchStrategy].AsOutputFile();
3432     }
3433     return retval;
3434 }
3435 
CBlastAppArgs()3436 CBlastAppArgs::CBlastAppArgs()
3437 {
3438     m_SearchStrategyArgs.Reset(new CSearchStrategyArgs);
3439     m_Args.push_back(CRef<IBlastCmdLineArgs>(&*m_SearchStrategyArgs));
3440     m_IsUngapped = false;
3441 }
3442 
3443 CArgDescriptions*
SetCommandLine()3444 CBlastAppArgs::SetCommandLine()
3445 {
3446     return SetUpCommandLineArguments(m_Args);
3447 }
3448 
3449 CRef<CBlastOptionsHandle>
SetOptions(const CArgs & args)3450 CBlastAppArgs::SetOptions(const CArgs& args)
3451 {
3452     // We're recovering from a saved strategy or combining
3453     // CBlastOptions/CBlastOptionsHandle with command line options (in GBench,
3454     // see GB-1116), so we need to still extract
3455     // certain options from the command line, include overriding query
3456     // and/or database
3457     if (m_OptsHandle.NotEmpty()) {
3458         CBlastOptions& opts = m_OptsHandle->SetOptions();
3459         //opts.DebugDumpText(cerr, "OptionsBeforeLoop", 1);
3460         const bool mbidxargs_set = CMbIndexArgs::HasBeenSet(args);
3461         const bool dbargs_set = CBlastDatabaseArgs::HasBeenSet(args);
3462         NON_CONST_ITERATE(TBlastCmdLineArgs, arg, m_Args) {
3463             if (dynamic_cast<CMbIndexArgs*>(&**arg)) {
3464                 if (mbidxargs_set)
3465                     (*arg)->ExtractAlgorithmOptions(args, opts);
3466             } else if (dynamic_cast<CBlastDatabaseArgs*>(&**arg)) {
3467                 if (dbargs_set)
3468                     m_BlastDbArgs->ExtractAlgorithmOptions(args, opts);
3469             } else {
3470                 (*arg)->ExtractAlgorithmOptions(args, opts);
3471             }
3472         }
3473         m_IsUngapped = !opts.GetGappedMode();
3474         try { m_OptsHandle->Validate(); }
3475         catch (const CBlastException& e) {
3476             NCBI_THROW(CInputException, eInvalidInput, e.GetMsg());
3477         }
3478         //opts.DebugDumpText(cerr, "OptionsAfterLoop", 1);
3479         return m_OptsHandle;
3480     }
3481 
3482     CBlastOptions::EAPILocality locality =
3483         (args.Exist(kArgRemote) && args[kArgRemote])
3484         ? CBlastOptions::eRemote
3485         : CBlastOptions::eLocal;
3486 
3487     // This is needed as a CRemoteBlast object and its options are instantiated
3488     // to create the search strategy
3489     if (GetExportSearchStrategyStream(args) ||
3490            m_FormattingArgs->ArchiveFormatRequested(args)) {
3491         locality = CBlastOptions::eBoth;
3492     }
3493 
3494     CRef<CBlastOptionsHandle> retval(x_CreateOptionsHandle(locality, args));
3495     CBlastOptions& opts = retval->SetOptions();
3496     NON_CONST_ITERATE(TBlastCmdLineArgs, arg, m_Args) {
3497         (*arg)->ExtractAlgorithmOptions(args, opts);
3498     }
3499 
3500     m_IsUngapped = !opts.GetGappedMode();
3501     try { retval->Validate(); }
3502     catch (const CBlastException& e) {
3503         NCBI_THROW(CInputException, eInvalidInput, e.GetMsg());
3504     }
3505     return retval;
3506 }
3507 
SetTask(const string & task)3508 void CBlastAppArgs::SetTask(const string& task)
3509 {
3510 #if _BLAST_DEBUG
3511     ThrowIfInvalidTask(task);
3512 #endif
3513     m_Task.assign(task);
3514 }
3515 
3516 /// Get the input stream
GetInputStream()3517 CNcbiIstream& CBlastAppArgs::GetInputStream() {
3518     return m_StdCmdLineArgs->GetInputStream();
3519 }
3520 /// Get the output stream
GetOutputStream()3521 CNcbiOstream& CBlastAppArgs::GetOutputStream() {
3522     return m_StdCmdLineArgs->GetOutputStream();
3523 }
3524 
3525 CArgDescriptions*
SetUpCommandLineArguments(TBlastCmdLineArgs & args)3526 SetUpCommandLineArguments(TBlastCmdLineArgs& args)
3527 {
3528     unique_ptr<CArgDescriptions> retval(new CArgDescriptions);
3529 
3530     // Create the groups so that the ordering is established
3531     retval->SetCurrentGroup("Input query options");
3532     retval->SetCurrentGroup("General search options");
3533     retval->SetCurrentGroup("BLAST database options");
3534     retval->SetCurrentGroup("BLAST-2-Sequences options");
3535     retval->SetCurrentGroup("Formatting options");
3536     retval->SetCurrentGroup("Query filtering options");
3537     retval->SetCurrentGroup("Restrict search or results");
3538     retval->SetCurrentGroup("Discontiguous MegaBLAST options");
3539     retval->SetCurrentGroup("Statistical options");
3540     retval->SetCurrentGroup("Search strategy options");
3541     retval->SetCurrentGroup("Extension options");
3542     retval->SetCurrentGroup("");
3543 
3544 
3545     NON_CONST_ITERATE(TBlastCmdLineArgs, arg, args) {
3546         (*arg)->SetArgumentDescriptions(*retval);
3547     }
3548     return retval.release();
3549 }
3550 
3551 CRef<CBlastOptionsHandle>
x_CreateOptionsHandleWithTask(CBlastOptions::EAPILocality locality,const string & task)3552 CBlastAppArgs::x_CreateOptionsHandleWithTask
3553     (CBlastOptions::EAPILocality locality, const string& task)
3554 {
3555     _ASSERT(!task.empty());
3556     CRef<CBlastOptionsHandle> retval;
3557     SetTask(task);
3558     retval.Reset(CBlastOptionsFactory::CreateTask(GetTask(), locality));
3559     _ASSERT(retval.NotEmpty());
3560     return retval;
3561 }
3562 
3563 void
x_IssueWarningsForIgnoredOptions(const CArgs & args)3564 CBlastAppArgs::x_IssueWarningsForIgnoredOptions(const CArgs& args)
3565 {
3566     set<string> can_override;
3567     can_override.insert(kArgQuery);
3568     can_override.insert(kArgQueryLocation);
3569     can_override.insert(kArgSubject);
3570     can_override.insert(kArgSubjectLocation);
3571     can_override.insert(kArgUseLCaseMasking);
3572     can_override.insert(kArgDb);
3573     can_override.insert(kArgDbSize);
3574     can_override.insert(kArgEntrezQuery);
3575     can_override.insert(kArgDbSoftMask);
3576     can_override.insert(kArgDbHardMask);
3577     can_override.insert(kArgUseIndex);
3578     can_override.insert(kArgIndexName);
3579     can_override.insert(kArgStrand);
3580     can_override.insert(kArgParseDeflines);
3581     can_override.insert(kArgOutput);
3582     can_override.insert(kArgOutputFormat);
3583     can_override.insert(kArgNumDescriptions);
3584     can_override.insert(kArgNumAlignments);
3585     can_override.insert(kArgMaxTargetSequences);
3586     can_override.insert(kArgRemote);
3587     can_override.insert(kArgNumThreads);
3588     can_override.insert(kArgInputSearchStrategy);
3589     can_override.insert(kArgRemote);
3590     can_override.insert("remote_verbose");
3591     can_override.insert("verbose");
3592 
3593     // this stores the arguments (and their defaults) that cannot be overriden
3594     map<string, string> has_defaults;
3595     EBlastProgramType prog = m_OptsHandle->GetOptions().GetProgramType();
3596     has_defaults[kArgCompBasedStats] =
3597     		Blast_ProgramIsRpsBlast(prog) ? kDfltArgCompBasedStatsDelta:kDfltArgCompBasedStats;
3598     // FIX the line below for igblast, and add igblast options
3599     has_defaults[kArgEvalue] = NStr::DoubleToString(BLAST_EXPECT_VALUE);
3600     has_defaults[kTask] = m_Task;
3601     has_defaults[kArgOldStyleIndex] = kDfltArgOldStyleIndex;
3602 
3603     if (Blast_QueryIsProtein(prog)) {
3604         if (NStr::Find(m_Task, "blastp") != NPOS ||
3605             NStr::Find(m_Task, "psiblast") != NPOS) {
3606             has_defaults[kArgSegFiltering] = kDfltArgNoFiltering;
3607         } else {
3608             has_defaults[kArgSegFiltering] = kDfltArgSegFiltering;
3609         }
3610         has_defaults[kArgLookupTableMaskingOnly] =
3611             kDfltArgLookupTableMaskingOnlyProt;
3612         has_defaults[kArgGapTrigger] =
3613             NStr::IntToString((int)BLAST_GAP_TRIGGER_PROT);
3614     } else {
3615         has_defaults[kArgDustFiltering] = kDfltArgDustFiltering;
3616         has_defaults[kArgLookupTableMaskingOnly] =
3617             kDfltArgLookupTableMaskingOnlyNucl;
3618         has_defaults[kArgGapTrigger] =
3619             NStr::IntToString((int)BLAST_GAP_TRIGGER_NUCL);
3620     }
3621     has_defaults[kArgOffDiagonalRange] =
3622         NStr::IntToString(kDfltOffDiagonalRange);
3623     has_defaults[kArgMaskLevel] = kDfltArgMaskLevel;
3624     has_defaults[kArgMaxIntronLength] =
3625         NStr::IntToString(kDfltArgMaxIntronLength);
3626     has_defaults[kArgQueryGeneticCode] = NStr::IntToString((int)BLAST_GENETIC_CODE);
3627     has_defaults[kArgDbGeneticCode] = NStr::IntToString((int)BLAST_GENETIC_CODE);
3628     // pssm engine/psiblast default options
3629     has_defaults[kArgPSIPseudocount] =
3630         NStr::IntToString(PSI_PSEUDO_COUNT_CONST);
3631     has_defaults[kArgPSIInclusionEThreshold] =
3632         NStr::DoubleToString(PSI_INCLUSION_ETHRESH);
3633     has_defaults[kArgPSINumIterations] =
3634         NStr::IntToString(kDfltArgPSINumIterations);
3635 
3636     // get arguments, remove the supported ones and warn about those that
3637     // cannot be overridden.
3638     typedef vector< CRef<CArgValue> > TArgs;
3639     TArgs arguments = args.GetAll();
3640     ITERATE(TArgs, a, arguments) {
3641         const string& arg_name = (*a)->GetName();
3642         const string& arg_value = (*a)->AsString();
3643         // if it has a default value, ignore it if it's not different from the
3644         // default, otherwise, issue a warning
3645         if (has_defaults.find(arg_name) != has_defaults.end()) {
3646             if (has_defaults[arg_name] == arg_value) {
3647                 continue;
3648             } else {
3649                 if (arg_name == kTask && arg_value == "megablast") {
3650                     // No need to issue warning here, as it's OK to change this
3651                     continue;
3652                 }
3653                 ERR_POST(Warning << arg_name << " cannot be overridden when "
3654                          "using a search strategy");
3655             }
3656         }
3657         // if the argument cannot be overridden, issue a warning
3658         if (can_override.find(arg_name) == can_override.end()) {
3659             ERR_POST(Warning << arg_name << " cannot be overridden when "
3660                      "using a search strategy");
3661         }
3662     }
3663 }
3664 
3665 CRef<CBlastOptionsHandle>
SetOptionsForSavedStrategy(const CArgs & args)3666 CBlastAppArgs::SetOptionsForSavedStrategy(const CArgs& args)
3667 {
3668 	if(m_OptsHandle.Empty())
3669 	{
3670         NCBI_THROW(CInputException, eInvalidInput, "Empty Blast Options Handle");
3671 	}
3672 
3673     // We're recovering from a saved strategy, so we need to still extract
3674     // certain options from the command line, include overriding query
3675     // and/or database
3676     CBlastOptions& opts = m_OptsHandle->SetOptions();
3677     // invoke ExtractAlgorithmOptions on certain argument classes, i.e.: those
3678     // that should have their arguments overriden
3679     m_QueryOptsArgs->ExtractAlgorithmOptions(args, opts);
3680     m_StdCmdLineArgs->ExtractAlgorithmOptions(args, opts);
3681     m_RemoteArgs->ExtractAlgorithmOptions(args, opts);
3682     m_DebugArgs->ExtractAlgorithmOptions(args, opts);
3683     m_FormattingArgs->ExtractAlgorithmOptions(args, opts);
3684     m_MTArgs->ExtractAlgorithmOptions(args, opts);
3685     if (CBlastDatabaseArgs::HasBeenSet(args)) {
3686           m_BlastDbArgs->ExtractAlgorithmOptions(args, opts);
3687     }
3688     if (CMbIndexArgs::HasBeenSet(args)) {
3689         NON_CONST_ITERATE(TBlastCmdLineArgs, arg, m_Args) {
3690             if (dynamic_cast<CMbIndexArgs*>(arg->GetPointer()) != NULL) {
3691                 (*arg)->ExtractAlgorithmOptions(args, opts);
3692             }
3693         }
3694     }
3695     m_IsUngapped = !opts.GetGappedMode();
3696     x_IssueWarningsForIgnoredOptions(args);
3697     try { m_OptsHandle->Validate(); }
3698     catch (const CBlastException& e) {
3699         NCBI_THROW(CInputException, eInvalidInput, e.GetMsg());
3700     }
3701     return m_OptsHandle;
3702 }
3703 
3704 END_SCOPE(blast)
3705 END_NCBI_SCOPE
3706