1 #ifndef ALGO_COBALT___COBALT_OPTIONS__HPP
2 #define ALGO_COBALT___COBALT_OPTIONS__HPP
3 
4 /* $Id: options.hpp 463771 2015-04-01 15:14:35Z boratyng $
5 * ===========================================================================
6 *
7 *                            PUBLIC DOMAIN NOTICE
8 *               National Center for Biotechnology Information
9 *
10 *  This software/database is a "United States Government Work" under the
11 *  terms of the United States Copyright Act.  It was written as part of
12 *  the author's offical duties as a United States Government employee and
13 *  thus cannot be copyrighted.  This software/database is freely available
14 *  to the public for use. The National Library of Medicine and the U.S.
15 *  Government have not placed any restriction on its use or reproduction.
16 *
17 *  Although all reasonable efforts have been taken to ensure the accuracy
18 *  and reliability of the software and data, the NLM and the U.S.
19 *  Government do not and cannot warrant the performance or results that
20 *  may be obtained by using this software or data. The NLM and the U.S.
21 *  Government disclaim all warranties, express or implied, including
22 *  warranties of performance, merchantability or fitness for any particular
23 *  purpose.
24 *
25 *  Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================*/
28 
29 /*****************************************************************************
30 
31 File name: options.hpp
32 
33 Author: Greg Boratyn
34 
35 Contents: Interface for CMultiAlignerOptions
36 
37 ******************************************************************************/
38 
39 
40 /// @file options.hpp
41 /// Options for CMultiAligner
42 
43 #include <corelib/ncbiobj.hpp>
44 #include <algo/cobalt/kmercounts.hpp>
45 #include <algo/align/nw/nw_pssm_aligner.hpp>
46 #include <objects/blast/Blast4_archive.hpp>
47 
48 /// Default values for cobalt parameters
49 /// Rps-Blast e-value cutoff for creating contraints
50 #define COBALT_RPS_EVALUE 0.003
51 /// Weight for domain residue frequecies when creating MSA profiles
52 #define COBALT_DOMAIN_BOOST 0.5
53 /// Hitlist size for Rps-Blast searches
54 #define COBALT_DOMAIN_HITLIST_SIZE 500
55 
56 /// Blastp e-value cutoff for creating contraints
57 #define COBALT_BLAST_EVALUE 0.005
58 /// Weight for sequence residues when creating MSA profules
59 #define COBALT_LOCAL_BOOST 1.0
60 
61 /// Pseudocount constant used in multiple alignment
62 #define COBALT_PSEUDO_COUNT 2.0
63 /// Conservation score cutoff used for selecting conserved columns in
64 /// initial MSA
65 #define COBALT_CONSERVED_CUTOFF 0.67
66 
67 /// Default method for computing progressive alignment tree
68 #define COBALT_TREE_METHOD CMultiAlignerOptions::eClusters
69 
70 /// Default substitution matrix used in multiple alignment
71 #define COBALT_DEFAULT_MATRIX "BLOSUM62"
72 /// End gap opening score
73 #define COBALT_END_GAP_OPEN -5
74 /// End gap extension score
75 #define COBALT_END_GAP_EXTNT -1
76 /// Gap opening score
77 #define COBALT_GAP_OPEN -11
78 /// Gap extension score
79 #define COBALT_GAP_EXTNT -1
80 
81 /// Maximum cluster diameter for pre-alignment sequence clustering
82 #define COBALT_MAX_CLUSTER_DIAM 0.8
83 /// K-mer length for sequence clustering
84 #define COBALT_KMER_LEN 4
85 /// K-mer alphabet for sequence clustering
86 #define COBALT_KMER_ALPH CMultiAlignerOptions::TKMethods::eSE_B15
87 
88 BEGIN_NCBI_SCOPE
89 BEGIN_SCOPE(cobalt)
90 
91 
92 /// Options and parameters for multiple alignement
93 ///
94 class NCBI_COBALT_EXPORT CMultiAlignerOptions : public CObject
95 {
96 
97 public:
98     typedef CNWAligner::TScore TScore;
99     typedef TKmerMethods<CSparseKmerCounts> TKMethods;
100 
101     /// Representation of CDD pattern
102     ///
103     /// Pattern is represented either as string or pointer in order
104     /// to avoid copying large blocks if patterns are already in memory.
105     /// Representation is selected by the use of constructor. String
106     /// constructor creates a copy of the argument, pointer one does not.
107     class CPattern
108     {
109     public:
110 
111         /// Create empty pattern
CPattern(void)112         CPattern(void)
113             : m_Pattern((char*)NULL), m_IsPointer(true) {}
114 
115         /// Create pattern as pointer. Referenced memory is not copied.
116         /// @param pattern Pattern
CPattern(char * pattern)117         CPattern(char* pattern)
118             : m_Pattern(pattern), m_IsPointer(true) {}
119 
120         /// Create pattern as string. The argument is copied.
121         /// @param pattern Pattern
CPattern(const string & pattern)122         CPattern(const string& pattern)
123             : m_Pattern(pattern), m_IsPointer(false) {}
124 
125         /// Create copy of a pattern
126         /// @param pattern Pattern
CPattern(const CPattern & pattern)127         CPattern(const CPattern& pattern)
128         {
129             if (pattern.m_IsPointer) {
130                 m_Pattern.pointer = pattern.m_Pattern.pointer;
131             }
132             else {
133                 m_Pattern.str = pattern.m_Pattern.str;
134             }
135             m_IsPointer = pattern.m_IsPointer;
136         }
137 
138         /// Assignment operator
139         /// @param pattern Pattern
operator =(const CPattern & pattern)140         CPattern& operator=(const CPattern& pattern)
141         {
142             if (pattern.m_IsPointer) {
143                 m_Pattern.pointer = pattern.m_Pattern.pointer;
144             }
145             else {
146                 m_Pattern.str = pattern.m_Pattern.str;
147             }
148             m_IsPointer = pattern.m_IsPointer;
149 
150             return *this;
151         }
152 
153         /// Get pattern as pointer
154         /// @return Pointer to a pattern
AsPointer(void) const155         const char* AsPointer(void) const
156         {return (m_IsPointer ? m_Pattern.pointer
157                  : m_Pattern.str.c_str());}
158 
159         /// Get a copy of a pattern as string
160         /// @return Copy of pattern
AsString(void) const161         string AsString(void) const
162         {return (m_IsPointer ? (string)m_Pattern.pointer : m_Pattern.str);}
163 
164         /// Check if pattern is stored as pointer
165         /// @return
166         ///     True if pattern is stored as pointer,
167         ///     False otherwise
IsPointer(void) const168         bool IsPointer(void) const {return m_IsPointer;}
169 
170         /// Check if pattern is empty
171         /// @return
172         ///    True if patter is empty,
173         ///    False otherwise
IsEmpty(void) const174         bool IsEmpty(void) const
175         {return m_IsPointer ? !m_Pattern.pointer : m_Pattern.str.empty();}
176 
177     private:
178         struct SPattern {
179             char* pointer;
180             string str;
181 
SPatternCMultiAlignerOptions::CPattern::SPattern182             SPattern(void) : pointer(NULL) {}
SPatternCMultiAlignerOptions::CPattern::SPattern183             SPattern(char* ptr) : pointer(ptr) {}
SPatternCMultiAlignerOptions::CPattern::SPattern184             SPattern(string s) : pointer(NULL), str(s) {}
185         };
186 
187         SPattern m_Pattern;
188         bool m_IsPointer;
189     };
190 
191 
192     /// Structure for representing single user constraint for pair-wise
193     /// alignment
194     struct SConstraint {
195         int seq1_index;
196         int seq1_start;
197         int seq1_stop;
198 
199         int seq2_index;
200         int seq2_start;
201         int seq2_stop;
202 
203         /// Create empty constraint
204         ///
SConstraintCMultiAlignerOptions::SConstraint205         SConstraint(void) : seq1_index(-1), seq2_index(-1) {}
206 
207         /// Create constraint for given sequences and locations
208         /// @param ind1 Index of sequence 1 in query array
209         /// @param start1 Start location for sequence 1
210         /// @param end1 End location for sequence 1
211         /// @param ind2 Index of sequence 2 in query array
212         /// @param start2 Start location for sequence 2
213         /// @param end2 End location for sequence 2
SConstraintCMultiAlignerOptions::SConstraint214         SConstraint(int ind1, int start1, int end1, int ind2, int start2,
215                         int end2)
216             : seq1_index(ind1), seq1_start(start1), seq1_stop(end1),
217               seq2_index(ind2), seq2_start(start2), seq2_stop(end2)
218         {}
219 
220     };
221 
222     typedef vector<SConstraint> TConstraints;
223 
224     /// Mode of multi aligner setings. Values can be combined.
225     enum EMode {
226 
227         // Qyery clusters
228         fNoQueryClusters = 1, ///< No query clustering
229 
230         // RPS Blast search
231         fNoRpsBlast = 1<<2,        ///< Do not use RPS Blast
232 
233         // Regular expression patterns search
234         fNoPatterns = 1<<3,        ///< Do not use conserved domain patterns
235 
236         // Iterative alignment
237         fNoIterate = 1<<4,          ///< Do not use Iterative alignment
238 
239         fNoRealign = 1<<5,    ///< Do not realign with different tree root
240 
241         fFastAlign = 1<<6,    ///< Do Fast and rough profile-profile alignment
242 
243         /// Set options for very fast alignment (speed over accuracy)
244         fFast = fNoRpsBlast | fNoIterate | fNoRealign | fFastAlign,
245 
246         // Other
247         fNonStandard = 1<<7   ///< Not used as input, indicates that
248                                ///< non-standard settings were selected after
249 
250     };
251 
252     typedef int TMode;
253 
254     /// Default options mode
255     static const TMode kDefaultMode = 0;
256 
257     /// Method for construction of guide tree for progressive alignment
258     enum ETreeMethod {
259         eNJ = 0,  ///< Neighbot Joining
260         eFastME,  ///< Fast Minimum Evolution
261         eClusters ///< Clustering dendrogram
262     };
263 
264     enum EInClustAlnMethod {
265         eNone = 0,     ///< No clustering
266         eToPrototype,  ///< All cluster elements are aligner to cluster
267                        ///< prototype
268 
269         eMulti         ///< Alignment guide tree for each cluster is attached
270                        ///< to the main alignment guide tree
271     };
272 
273 public:
274 
275     /// Create options with default mode
276     ///
277     CMultiAlignerOptions(void);
278 
279     /// Create options with desired mode
280     /// @param mode Desired mode of operation
281     ///
282     explicit CMultiAlignerOptions(TMode mode);
283 
284     /// Create options with RPS database and desired mode
285     /// @param rps_db_name Name of RPS database
286     /// @param mode Mode of operation
287     ///
288     CMultiAlignerOptions(const string& rps_db_name, TMode mode = kDefaultMode);
289 
290 
291     // Turn on and off major options and set major parameters. Other parameter
292     // are set to defaults.
293 
294     /// Set use of query clustering option
295     ///
296     /// If the option in set on, query sequences will be clustered. Each cluster
297     /// Will be aligned independently without searching for conserved domains
298     /// (RPS Blast) and local hist (Blastp). Multiple alignment will be
299     /// performed on cluster profiles. Parameters of clustering procedure are
300     /// set base on EMode value and can be chanded with expert functions.
301     /// @param use Option used if true [in]
302     ///
SetUseQueryClusters(bool use)303     void SetUseQueryClusters(bool use)
304     {m_UseQueryClusters = use; m_Mode = fNonStandard;}
305 
306 
307     /// Check if query clustering option is on
308     /// @return
309     ///   - True if query clustering option is on,
310     ///   - False otherwise
311     ///
GetUseQueryClusters(void) const312     bool GetUseQueryClusters(void) const {return m_UseQueryClusters;}
313 
314 
315     /// Set use of iterative alignment option
316     ///
317     /// After initial multiple alignment is done, conserved columns will be
318     /// found and sequences will be re-aligned using this information. Default
319     /// parameters will be used. Iterative alignment parameters can be changed
320     /// with expert functions.
321     /// @param use Option used if true, otherwise not used [in]
322     ///
SetIterate(bool use)323     void SetIterate(bool use)
324     {m_Iterate = use; m_Mode = fNonStandard;}
325 
326     /// Set realigning MSA with different root nodes in the progressive
327     /// alignment tree
328     /// @param r Do realignment if true [in]
329     ///
SetRealign(bool r)330     void SetRealign(bool r)
331     {m_Realign = r; m_Mode = fNonStandard;}
332 
333     /// Check if iterative alignmnet option is used
334     /// @return
335     ///   - True if iterative alignment option is on,
336     ///   - False otherwise
337     ///
GetIterate(void) const338     bool GetIterate(void) const {return m_Iterate;}
339 
340     /// Check if MSA is to be realigned for different rooting of progressive
341     /// alignment tree
342     /// @return True if MSA is to be realigned, false otherwise
343     ///
GetRealign(void) const344     bool GetRealign(void) const {return m_Realign;}
345 
346 
347     /// Use RPS Blast with given database
348     ///
349     /// RPS Blast will be used to find conserved domains in query sequences.
350     /// Pairwise alignments are constrainted so that matching conserved domains
351     /// are aligned.
352     /// Default RPS Blast parameters will be used. RPS Blast parameters can be
353     /// changed with expetr functions.
354     /// @param dbname Path and name of RPS Blast data base [in]
355     ///
SetRpsDb(const string & dbname)356     void SetRpsDb(const string& dbname)
357     {m_RpsDb = dbname; m_Mode = m_Mode & ~fNoRpsBlast;}
358 
359 
360     /// Get RPS Blast data base name
361     /// @return RPS Blast data base name
362     ///
GetRpsDb(void) const363     string GetRpsDb(void) const {return m_RpsDb;}
364 
365 
366     /// Determine if RPS Blast is to be used
367     /// @return True if RPS Blast will be used, false otherwise
368     ///
GetUseRpsBlast(void) const369     bool GetUseRpsBlast(void) const {return !m_RpsDb.empty();}
370 
371 
372     /// Set regular expression patterns for identification of conserved domains.
373     /// Patterns are not freed when object is deleted.
374     ///
375     /// Regular expresion patterns will be used to find conserved domains.
376     /// Pairwise alignmnents will be constained to so that matching conserved
377     /// domains are aligned. Parameter ownership is transfered to options.
378     /// @return Reference to the list of conserved domain patterns
379     ///
SetCddPatterns(void)380     vector<CPattern>& SetCddPatterns(void)
381     {m_Mode = fNonStandard; return m_Patterns;}
382 
383 
384     /// Set default patterns for identification of conserved domains.
385     ///
386     /// Regular expresion patterns will be used to find conserved domains.
387     /// Pairwise alignmnents will be constained to so that matching conserved
388     /// domains are aligned. Parameter ownership is transfered to options.
389     ///
390     void SetDefaultCddPatterns(void);
391 
392 
393     /// Get regular expression patterns for identification of conserved domains
394     /// @return List of conserved domain patterns
395     ///
GetCddPatterns(void) const396     const vector<CPattern>& GetCddPatterns(void) const {return m_Patterns;}
397 
398 
399     /// Set user constraints.
400     ///
401     /// The constraits are used in progressive alignment.
402     /// @return Reference to the list of user constraints
403     ///
SetUserConstraints(void)404     TConstraints& SetUserConstraints(void)
405     {m_Mode = fNonStandard; return m_UserHits;}
406 
407 
408     /// Get user constraints
409     /// @return User constraits
410     ///
GetUserConstraints(void) const411     const TConstraints& GetUserConstraints(void) const
412     {return m_UserHits;}
413 
414 
415     /// Set score for user alignment constraints
416     /// @param score Score
417     ///
SetUserConstraintsScore(int score)418     void SetUserConstraintsScore(int score)
419     {m_UserHitsScore = score; m_Mode = fNonStandard;}
420 
421 
422     /// Get score for user alignment constraints
423     /// @return Score for user alignmnet constraints
424     ///
GetUserConstraintsScore(void) const425     int GetUserConstraintsScore(void) const {return m_UserHitsScore;}
426 
427 
428     //--- Expert Methods ---
429 
430     //--- Query clustering ---
431 
432     /// Set word size for creating word count vectors in query clustering
433     /// @param len Number of letters in a word [in]
434     ///
SetKmerLength(int len)435     void SetKmerLength(int len) {m_KmerLength = len; m_Mode = fNonStandard;}
436 
437     /// Get word size for creating word count vectors
438     /// @return Number of letters in a word
439     ///
GetKmerLength(void) const440     int GetKmerLength(void) const {return m_KmerLength;}
441 
442     /// Set alphabet for creating word count vectors
443     /// @param alph Alphabet [in]
444     ///
SetKmerAlphabet(TKMethods::ECompressedAlphabet alph)445     void SetKmerAlphabet(TKMethods::ECompressedAlphabet alph)
446     {m_KmerAlphabet = alph; m_Mode = fNonStandard;}
447 
448     /// Get alphabet used for creating word count vectors
449     /// @return Alphabet
450     ///
GetKmerAlphabet(void) const451     TKMethods::ECompressedAlphabet GetKmerAlphabet(void) const
452     {return m_KmerAlphabet;}
453 
454     /// Set measure for computing distance between word count vectors
455     /// @param method Distance method [in]
456     ///
SetKmerDistMeasure(TKMethods::EDistMeasures method)457     void SetKmerDistMeasure(TKMethods::EDistMeasures method)
458     {m_ClustDistMeasure = method; m_Mode = fNonStandard;}
459 
460 
461     /// Get method for computing distance between word count vectors
462     /// @return Distance method
463     ///
GetKmerDistMeasure(void) const464     TKMethods::EDistMeasures GetKmerDistMeasure(void) const
465     {return m_ClustDistMeasure;}
466 
467 
468     /// Set maximum allowed distance between sequences in a cluster
469     /// @param dist Maximum allowed distance in cluster [in]
470     ///
SetMaxInClusterDist(double dist)471     void SetMaxInClusterDist(double dist)
472     {m_MaxInClusterDist = dist; m_Mode = fNonStandard;}
473 
474     /// Get maximum allowed distance between sequences in a cluster
475     /// @return Maxium allowed distance in cluster
476     ///
GetMaxInClusterDist(void) const477     double GetMaxInClusterDist(void) const {return m_MaxInClusterDist;}
478 
479 
480     //--- RPS Blast ---
481 
482     /// Set e-value threshold for accepting RPS Blast hits
483     /// @param evalue E-value for acceting RPS Blast hits [in]
484     ///
SetRpsEvalue(double evalue)485     void SetRpsEvalue(double evalue)
486     {m_RpsEvalue = evalue; m_Mode = fNonStandard;}
487 
488     /// Get e-value threshold for accepting RPS Blast hits
489     /// @return E-value for accepting RPS Blast hits
490     ///
GetRpsEvalue(void) const491     double GetRpsEvalue(void) const {return m_RpsEvalue;}
492 
493     /// Set hitlist size (per sequence) for domain search
494     /// @param size Hitlist size [in]
495     ///
SetDomainHitlistSize(int size)496     void SetDomainHitlistSize(int size)
497     {m_DomainHitlistSize = size; m_Mode = fNonStandard;}
498 
499     /// Get hitlist size (per sequence) for domain searches
500     /// @return Hitlist size for domain searches
501     ///
GetDomainHitlistSize(void) const502     int GetDomainHitlistSize(void) const {return m_DomainHitlistSize;}
503 
504     /// Set boost for residue frequencies in conserved domains from RPS data
505     /// base
506     /// @param boost Boost for RPS residue frequencies [in]
507     ///
SetDomainResFreqBoost(double boost)508     void SetDomainResFreqBoost(double boost)
509     {m_DomainResFreqBoost = boost; m_Mode = fNonStandard;}
510 
511     /// Get boost for residue frequencies in conserved domains from RPS data
512     /// base
513     /// @return Boost for RPS residue frequencies
514     ///
GetDomainResFreqBoost(void) const515     double GetDomainResFreqBoost(void) const {return m_DomainResFreqBoost;}
516 
517     /// Set use of precomputed RPS Blast hits
518     /// @param use
SetUsePreRpsHits(bool use)519     void SetUsePreRpsHits(bool use)
520     {m_UsePreRpsHits = use; m_Mode = fNonStandard;}
521 
522     /// Get use of precomputed RPS Blast hits
523     /// @return
524     ///     - True if use precomputed RPS hits is on,
525     ///     - False otherwise
526     ///
GetUsePreRpsHits(void) const527     bool GetUsePreRpsHits(void) const {return m_UsePreRpsHits;}
528 
529 
530     //--- Blastp ---
531 
532     /// Set e-value for accepting Blastp hits
533     /// @param evalue E-value for accepting Blastp hits [in]
534     ///
SetBlastpEvalue(double evalue)535     void SetBlastpEvalue(double evalue)
536     {m_BlastpEvalue = evalue; m_Mode = fNonStandard;}
537 
538     /// Get e-value for accepting Blastp hits
539     /// @return E-value for accepting Blastp hits
540     ///
GetBlastpEvalue(void) const541     double GetBlastpEvalue(void) const {return m_BlastpEvalue;}
542 
543 
544     //--- Iterative alignment
545 
546     /// Set cutoff score for conserved aligned columns
547     /// @param score Cutoff score [in]
548     ///
SetConservedCutoffScore(double score)549     void SetConservedCutoffScore(double score)
550     {m_ConservedCutoff = score; m_Mode = fNonStandard;}
551 
552     /// Get cutoff score for conserved aligned columns
553     /// @return Cutoff score
554     ///
GetConservedCutoffScore(void) const555     double GetConservedCutoffScore(void) const {return m_ConservedCutoff;}
556 
557     /// Set pseudocount for calculating column entropy
558     /// @param pseudocount Pseudocount [in]
559     ///
SetPseudocount(double pseudocount)560     void SetPseudocount(double pseudocount)
561     {m_Pseudocount = pseudocount; m_Mode = fNonStandard;}
562 
563     /// Get pseudocount for calculating column entropy
564     /// @return Pseudocount
565     ///
GetPseudocount(void) const566     double GetPseudocount(void) const {return m_Pseudocount;}
567 
568 
569     //--- Progressive alignment
570 
571     /// Set method for creating tree that guides progressive alignment
572     /// @param method Tree computation method [in]
573     ///
SetTreeMethod(ETreeMethod method)574     void SetTreeMethod(ETreeMethod method)
575     {m_TreeMethod = method; m_Mode = fNonStandard;}
576 
577     /// Get method for creating tree that guides progressive alignment
578     /// @return Tree method
579     ///
GetTreeMethod(void) const580     ETreeMethod GetTreeMethod(void) const {return m_TreeMethod;}
581 
582     /// Set frequency boost for a letter that appears in query sequence in
583     /// given position
584     /// @param boost Frequency boost [in]
585     ///
SetLocalResFreqBoost(double boost)586     void SetLocalResFreqBoost(double boost)
587     {m_LocalResFreqBoost = boost; m_Mode =fNonStandard;}
588 
589     /// Get frequency boost for a letter that appears in query sequence in
590     /// given position
591     /// @return Frequency boost
592     ///
GetLocalResFreqBoost(void) const593     double GetLocalResFreqBoost(void) const {return m_LocalResFreqBoost;}
594 
595 
596     //--- Pairwise alignment
597 
598     /// Set alignment socre matrix name
599     /// @param matrix Score matrix name [in]
600     ///
SetScoreMatrixName(const string & matrix)601     void SetScoreMatrixName(const string& matrix)
602     {m_MatrixName = matrix; m_Mode = fNonStandard;}
603 
604     /// Get alignment score matrix name
605     /// @return Score matrix name
606     ///
GetScoreMatrixName(void) const607     string GetScoreMatrixName(void) const {return m_MatrixName;}
608 
609     /// Set gap opening penalty for middle gaps in pairwise global alignment
610     /// of profiles
611     /// @param penalty Gap open penalty [in]
612     ///
SetGapOpenPenalty(TScore penalty)613     void SetGapOpenPenalty(TScore penalty)
614     {m_GapOpen = penalty; m_Mode = fNonStandard;}
615 
616     /// Get gap opening penalty for middle gaps in pairwise global alignment
617     /// of profiles
618     /// @return Gap open penalty
619     ///
GetGapOpenPenalty(void) const620     TScore GetGapOpenPenalty(void) const {return m_GapOpen;}
621 
622     /// Set gap extension penalty for middle gaps in pairwise global alignment
623     /// of profiles
624     /// @param penalty Gap extension penalty [in]
625     ///
SetGapExtendPenalty(TScore penalty)626     void SetGapExtendPenalty(TScore penalty)
627     {m_GapExtend = penalty; m_Mode = fNonStandard;}
628 
629     /// Get gap extension penlaty for middle gaps in pairwise global alignment
630     /// of profiles
631     /// @return Gap extension penalty
632     ///
GetGapExtendPenalty(void) const633     TScore GetGapExtendPenalty(void) const {return m_GapExtend;}
634 
635     /// Set gap opening penalty for end gaps in pairwise global alignment
636     /// of profiles
637     /// @param penalty Gap open penalty [in]
638     ///
SetEndGapOpenPenalty(TScore penalty)639     void SetEndGapOpenPenalty(TScore penalty)
640     {m_EndGapOpen = penalty; m_Mode = fNonStandard;}
641 
642     /// Get gap opening penalty for end gaps in pairwise global alignment
643     /// of profiles
644     /// @return Gap open penalty
645     ///
GetEndGapOpenPenalty(void) const646     TScore GetEndGapOpenPenalty(void) const {return m_EndGapOpen;}
647 
648     /// Set gap extension penalty for end gaps in pairwise global alignment
649     /// of profiles
650     /// @param penalty Gap extension penalty [in]
651     ///
SetEndGapExtendPenalty(TScore penalty)652     void SetEndGapExtendPenalty(TScore penalty)
653     {m_EndGapExtend = penalty; m_Mode = fNonStandard;}
654 
655     /// Get gap extension penalty for end gaps in pairwise global alignment
656     /// of profiles
657     /// @return Gap extension penalty
658     ///
GetEndGapExtendPenalty(void) const659     TScore GetEndGapExtendPenalty(void) const {return m_EndGapExtend;}
660 
661 
662     //--- Other ---
663 
664     /// Get options mode
665     /// @return Options mode
666     ///
GetMode(void) const667     TMode GetMode(void) const {return m_Mode;}
668 
669     /// Check whether parameter values belong to any of the standard modes
670     ///
671     /// The mode is standard when parameters are not changed after the options
672     /// object is created, with exception of RPS database name.
673     /// @return
674     ///     True if parameter values belong to a stanard mode,
675     ///     False otherwise
676     ///
IsStandardMode(void) const677     bool IsStandardMode(void) const {return !(m_Mode & fNonStandard);}
678 
679     /// Set verbose mode
680     ///
681     /// If set, intermidiate results will be provided in stdout
682     /// @param verbose Verbose mode set if true, not set otherwise [in]
683     ///
SetVerbose(bool verbose)684     void SetVerbose(bool verbose) {m_Verbose = verbose;}
685 
686     /// Get verbose mode
687     ///
688     /// If set, intermidiate results will be provided in stdout
689     /// @return
690     ///   - true if verbose mode set
691     ///   - false otherwise
GetVerbose(void) const692     bool GetVerbose(void) const {return m_Verbose;}
693 
SetInClustAlnMethod(EInClustAlnMethod method)694     void SetInClustAlnMethod(EInClustAlnMethod method)
695     {m_InClustAlnMethod = method;}
696 
GetInClustAlnMethod(void) const697     EInClustAlnMethod GetInClustAlnMethod(void) const
698     {return m_InClustAlnMethod;}
699 
700     /// Set pre-computed domain hits
701     /// @param archive Blast4 archive with precomputed domain hits [in]
702     ///
SetDomainHits(CConstRef<objects::CBlast4_archive> archive)703     void SetDomainHits(CConstRef<objects::CBlast4_archive> archive)
704     {m_DomainHits = archive;}
705 
706     /// Get pre-computed domain hits
707     /// @return Blast4 archive with pre-computed domain hits
708     ///
GetDomainHits(void) const709     CConstRef<objects::CBlast4_archive> GetDomainHits(void) const
710     {return m_DomainHits;}
711 
712     /// Are pre-computed domain hits set
713     /// @return true if pre-computed domain hits are set, false otherwise
714     ///
CanGetDomainHits(void) const715     bool CanGetDomainHits(void) const
716     {return !m_DomainHits.Empty();}
717 
718     /// Check if fast alignment is to be used
719     ///
720     /// Fast alignment means that constraints will be used instead of profile-
721     /// profile alignment
722     /// @return If true, fast alignment will be used, regural otherwise
723     ///
GetFastAlign(void) const724     bool GetFastAlign(void) const {return m_FastAlign;}
725 
726     /// Turn fast alignment method on/off
727     ///
728     /// Fast alignment means that constraints will be used instead of profile-
729     /// profile alignment
730     /// @param Fast alignment will be used if true [in]
731     ///
SetFastAlign(bool f)732     void SetFastAlign(bool f) {m_FastAlign = f; m_Mode = fNonStandard;}
733 
734     /// Get central sequence
735     ///
736     /// This is used for fast alignment of BLAST results. One sequence
737     /// (typically BLAST query) is made artificially similar to all other
738     /// sequences to make fast alignment more agreeable with BLAST alignments.
739     /// @param Zero-based index of the central sequence [in]
740     ///
GetCentralSeq(void) const741     int GetCentralSeq(void) const {return m_CentralSeq;}
742 
743     /// Set central sequence
744     ///
745     /// This is used for fast alignment of BLAST results. One sequence
746     /// (typically BLAST query) is made artificially similar to all other
747     /// sequences to make fast alignment more agreeable with BLAST alignments.
748     /// @return Zero-based index of the central sequence
749     ///
SetCentralSeq(int c)750     void SetCentralSeq(int c) {m_CentralSeq = c;}
751 
752 
753     //--- Options validation ---
754 
755     /// Validate parameter values
756     /// @return True if parameters valid, false otherwise
757     ///
758     bool Validate(void);
759 
760     /// Get warning messages
761     /// @return Warning messages
762     ///
GetMessages(void)763     const vector<string>& GetMessages(void) {return m_Messages;}
764 
765 private:
766 
767     /// Forbidding copy constructor
768     CMultiAlignerOptions(const CMultiAlignerOptions&);
769 
770     /// Forbidding assignment operator
771     CMultiAlignerOptions& operator=(const CMultiAlignerOptions&);
772 
773     /// Initiate parameter values based on the specified mode
774     /// @param mode Mode
775     void x_InitParams(TMode mode);
776 
777 
778 private:
779 
780     TMode m_Mode;
781 
782     // Query clustering
783     bool m_UseQueryClusters;
784     TKMethods::ECompressedAlphabet m_KmerAlphabet;
785     unsigned int m_KmerLength;
786     double m_MaxInClusterDist;
787     TKMethods::EDistMeasures m_ClustDistMeasure;
788     EInClustAlnMethod m_InClustAlnMethod;
789     int m_CentralSeq;
790 
791     // RPS Blast
792     string m_RpsDb;
793     double m_RpsEvalue;
794     int m_DomainHitlistSize;
795     double m_DomainResFreqBoost;
796     bool m_UsePreRpsHits;
797 
798     // Blastp
799     double m_BlastpEvalue;
800 
801     // Patterns
802     vector<CPattern> m_Patterns;
803 
804     // Iterative alignmnet
805     bool m_Iterate;
806     double m_ConservedCutoff;
807     double m_Pseudocount;
808 
809     // Realign MSA for different progressive alignment tree rooting
810     bool m_Realign;
811 
812     // Skip profile-profile alignment whenever possible and use information
813     // from constraints to align sequences and profiles
814     bool m_FastAlign;
815 
816     // User constraints
817     TConstraints m_UserHits;
818     int m_UserHitsScore;
819 
820     // Progressive alignment
821     ETreeMethod m_TreeMethod;
822     double m_LocalResFreqBoost;
823 
824     // Pairwise alignment
825     string m_MatrixName;
826     TScore m_GapOpen;
827     TScore m_GapExtend;
828     TScore m_EndGapOpen;
829     TScore m_EndGapExtend;
830 
831     // Pre-computed hits
832     CConstRef<objects::CBlast4_archive> m_DomainHits;
833 
834     bool m_Verbose;
835 
836     vector<string> m_Messages;
837 
838     static const int kDefaultUserConstraintsScore = 1000000;
839 };
840 
841 END_SCOPE(cobalt)
842 END_NCBI_SCOPE
843 
844 #endif /* ALGO_COBALT___COBALT_OPTIONS__HPP */
845