1 #ifndef ALGO_COBALT___COBALT_OPTIONS__HPP 2 #define ALGO_COBALT___COBALT_OPTIONS__HPP 3 4 /* $Id: options.hpp 463771 2015-04-01 15:14:35Z boratyng $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's offical duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * ===========================================================================*/ 28 29 /***************************************************************************** 30 31 File name: options.hpp 32 33 Author: Greg Boratyn 34 35 Contents: Interface for CMultiAlignerOptions 36 37 ******************************************************************************/ 38 39 40 /// @file options.hpp 41 /// Options for CMultiAligner 42 43 #include <corelib/ncbiobj.hpp> 44 #include <algo/cobalt/kmercounts.hpp> 45 #include <algo/align/nw/nw_pssm_aligner.hpp> 46 #include <objects/blast/Blast4_archive.hpp> 47 48 /// Default values for cobalt parameters 49 /// Rps-Blast e-value cutoff for creating contraints 50 #define COBALT_RPS_EVALUE 0.003 51 /// Weight for domain residue frequecies when creating MSA profiles 52 #define COBALT_DOMAIN_BOOST 0.5 53 /// Hitlist size for Rps-Blast searches 54 #define COBALT_DOMAIN_HITLIST_SIZE 500 55 56 /// Blastp e-value cutoff for creating contraints 57 #define COBALT_BLAST_EVALUE 0.005 58 /// Weight for sequence residues when creating MSA profules 59 #define COBALT_LOCAL_BOOST 1.0 60 61 /// Pseudocount constant used in multiple alignment 62 #define COBALT_PSEUDO_COUNT 2.0 63 /// Conservation score cutoff used for selecting conserved columns in 64 /// initial MSA 65 #define COBALT_CONSERVED_CUTOFF 0.67 66 67 /// Default method for computing progressive alignment tree 68 #define COBALT_TREE_METHOD CMultiAlignerOptions::eClusters 69 70 /// Default substitution matrix used in multiple alignment 71 #define COBALT_DEFAULT_MATRIX "BLOSUM62" 72 /// End gap opening score 73 #define COBALT_END_GAP_OPEN -5 74 /// End gap extension score 75 #define COBALT_END_GAP_EXTNT -1 76 /// Gap opening score 77 #define COBALT_GAP_OPEN -11 78 /// Gap extension score 79 #define COBALT_GAP_EXTNT -1 80 81 /// Maximum cluster diameter for pre-alignment sequence clustering 82 #define COBALT_MAX_CLUSTER_DIAM 0.8 83 /// K-mer length for sequence clustering 84 #define COBALT_KMER_LEN 4 85 /// K-mer alphabet for sequence clustering 86 #define COBALT_KMER_ALPH CMultiAlignerOptions::TKMethods::eSE_B15 87 88 BEGIN_NCBI_SCOPE 89 BEGIN_SCOPE(cobalt) 90 91 92 /// Options and parameters for multiple alignement 93 /// 94 class NCBI_COBALT_EXPORT CMultiAlignerOptions : public CObject 95 { 96 97 public: 98 typedef CNWAligner::TScore TScore; 99 typedef TKmerMethods<CSparseKmerCounts> TKMethods; 100 101 /// Representation of CDD pattern 102 /// 103 /// Pattern is represented either as string or pointer in order 104 /// to avoid copying large blocks if patterns are already in memory. 105 /// Representation is selected by the use of constructor. String 106 /// constructor creates a copy of the argument, pointer one does not. 107 class CPattern 108 { 109 public: 110 111 /// Create empty pattern CPattern(void)112 CPattern(void) 113 : m_Pattern((char*)NULL), m_IsPointer(true) {} 114 115 /// Create pattern as pointer. Referenced memory is not copied. 116 /// @param pattern Pattern CPattern(char * pattern)117 CPattern(char* pattern) 118 : m_Pattern(pattern), m_IsPointer(true) {} 119 120 /// Create pattern as string. The argument is copied. 121 /// @param pattern Pattern CPattern(const string & pattern)122 CPattern(const string& pattern) 123 : m_Pattern(pattern), m_IsPointer(false) {} 124 125 /// Create copy of a pattern 126 /// @param pattern Pattern CPattern(const CPattern & pattern)127 CPattern(const CPattern& pattern) 128 { 129 if (pattern.m_IsPointer) { 130 m_Pattern.pointer = pattern.m_Pattern.pointer; 131 } 132 else { 133 m_Pattern.str = pattern.m_Pattern.str; 134 } 135 m_IsPointer = pattern.m_IsPointer; 136 } 137 138 /// Assignment operator 139 /// @param pattern Pattern operator =(const CPattern & pattern)140 CPattern& operator=(const CPattern& pattern) 141 { 142 if (pattern.m_IsPointer) { 143 m_Pattern.pointer = pattern.m_Pattern.pointer; 144 } 145 else { 146 m_Pattern.str = pattern.m_Pattern.str; 147 } 148 m_IsPointer = pattern.m_IsPointer; 149 150 return *this; 151 } 152 153 /// Get pattern as pointer 154 /// @return Pointer to a pattern AsPointer(void) const155 const char* AsPointer(void) const 156 {return (m_IsPointer ? m_Pattern.pointer 157 : m_Pattern.str.c_str());} 158 159 /// Get a copy of a pattern as string 160 /// @return Copy of pattern AsString(void) const161 string AsString(void) const 162 {return (m_IsPointer ? (string)m_Pattern.pointer : m_Pattern.str);} 163 164 /// Check if pattern is stored as pointer 165 /// @return 166 /// True if pattern is stored as pointer, 167 /// False otherwise IsPointer(void) const168 bool IsPointer(void) const {return m_IsPointer;} 169 170 /// Check if pattern is empty 171 /// @return 172 /// True if patter is empty, 173 /// False otherwise IsEmpty(void) const174 bool IsEmpty(void) const 175 {return m_IsPointer ? !m_Pattern.pointer : m_Pattern.str.empty();} 176 177 private: 178 struct SPattern { 179 char* pointer; 180 string str; 181 SPatternCMultiAlignerOptions::CPattern::SPattern182 SPattern(void) : pointer(NULL) {} SPatternCMultiAlignerOptions::CPattern::SPattern183 SPattern(char* ptr) : pointer(ptr) {} SPatternCMultiAlignerOptions::CPattern::SPattern184 SPattern(string s) : pointer(NULL), str(s) {} 185 }; 186 187 SPattern m_Pattern; 188 bool m_IsPointer; 189 }; 190 191 192 /// Structure for representing single user constraint for pair-wise 193 /// alignment 194 struct SConstraint { 195 int seq1_index; 196 int seq1_start; 197 int seq1_stop; 198 199 int seq2_index; 200 int seq2_start; 201 int seq2_stop; 202 203 /// Create empty constraint 204 /// SConstraintCMultiAlignerOptions::SConstraint205 SConstraint(void) : seq1_index(-1), seq2_index(-1) {} 206 207 /// Create constraint for given sequences and locations 208 /// @param ind1 Index of sequence 1 in query array 209 /// @param start1 Start location for sequence 1 210 /// @param end1 End location for sequence 1 211 /// @param ind2 Index of sequence 2 in query array 212 /// @param start2 Start location for sequence 2 213 /// @param end2 End location for sequence 2 SConstraintCMultiAlignerOptions::SConstraint214 SConstraint(int ind1, int start1, int end1, int ind2, int start2, 215 int end2) 216 : seq1_index(ind1), seq1_start(start1), seq1_stop(end1), 217 seq2_index(ind2), seq2_start(start2), seq2_stop(end2) 218 {} 219 220 }; 221 222 typedef vector<SConstraint> TConstraints; 223 224 /// Mode of multi aligner setings. Values can be combined. 225 enum EMode { 226 227 // Qyery clusters 228 fNoQueryClusters = 1, ///< No query clustering 229 230 // RPS Blast search 231 fNoRpsBlast = 1<<2, ///< Do not use RPS Blast 232 233 // Regular expression patterns search 234 fNoPatterns = 1<<3, ///< Do not use conserved domain patterns 235 236 // Iterative alignment 237 fNoIterate = 1<<4, ///< Do not use Iterative alignment 238 239 fNoRealign = 1<<5, ///< Do not realign with different tree root 240 241 fFastAlign = 1<<6, ///< Do Fast and rough profile-profile alignment 242 243 /// Set options for very fast alignment (speed over accuracy) 244 fFast = fNoRpsBlast | fNoIterate | fNoRealign | fFastAlign, 245 246 // Other 247 fNonStandard = 1<<7 ///< Not used as input, indicates that 248 ///< non-standard settings were selected after 249 250 }; 251 252 typedef int TMode; 253 254 /// Default options mode 255 static const TMode kDefaultMode = 0; 256 257 /// Method for construction of guide tree for progressive alignment 258 enum ETreeMethod { 259 eNJ = 0, ///< Neighbot Joining 260 eFastME, ///< Fast Minimum Evolution 261 eClusters ///< Clustering dendrogram 262 }; 263 264 enum EInClustAlnMethod { 265 eNone = 0, ///< No clustering 266 eToPrototype, ///< All cluster elements are aligner to cluster 267 ///< prototype 268 269 eMulti ///< Alignment guide tree for each cluster is attached 270 ///< to the main alignment guide tree 271 }; 272 273 public: 274 275 /// Create options with default mode 276 /// 277 CMultiAlignerOptions(void); 278 279 /// Create options with desired mode 280 /// @param mode Desired mode of operation 281 /// 282 explicit CMultiAlignerOptions(TMode mode); 283 284 /// Create options with RPS database and desired mode 285 /// @param rps_db_name Name of RPS database 286 /// @param mode Mode of operation 287 /// 288 CMultiAlignerOptions(const string& rps_db_name, TMode mode = kDefaultMode); 289 290 291 // Turn on and off major options and set major parameters. Other parameter 292 // are set to defaults. 293 294 /// Set use of query clustering option 295 /// 296 /// If the option in set on, query sequences will be clustered. Each cluster 297 /// Will be aligned independently without searching for conserved domains 298 /// (RPS Blast) and local hist (Blastp). Multiple alignment will be 299 /// performed on cluster profiles. Parameters of clustering procedure are 300 /// set base on EMode value and can be chanded with expert functions. 301 /// @param use Option used if true [in] 302 /// SetUseQueryClusters(bool use)303 void SetUseQueryClusters(bool use) 304 {m_UseQueryClusters = use; m_Mode = fNonStandard;} 305 306 307 /// Check if query clustering option is on 308 /// @return 309 /// - True if query clustering option is on, 310 /// - False otherwise 311 /// GetUseQueryClusters(void) const312 bool GetUseQueryClusters(void) const {return m_UseQueryClusters;} 313 314 315 /// Set use of iterative alignment option 316 /// 317 /// After initial multiple alignment is done, conserved columns will be 318 /// found and sequences will be re-aligned using this information. Default 319 /// parameters will be used. Iterative alignment parameters can be changed 320 /// with expert functions. 321 /// @param use Option used if true, otherwise not used [in] 322 /// SetIterate(bool use)323 void SetIterate(bool use) 324 {m_Iterate = use; m_Mode = fNonStandard;} 325 326 /// Set realigning MSA with different root nodes in the progressive 327 /// alignment tree 328 /// @param r Do realignment if true [in] 329 /// SetRealign(bool r)330 void SetRealign(bool r) 331 {m_Realign = r; m_Mode = fNonStandard;} 332 333 /// Check if iterative alignmnet option is used 334 /// @return 335 /// - True if iterative alignment option is on, 336 /// - False otherwise 337 /// GetIterate(void) const338 bool GetIterate(void) const {return m_Iterate;} 339 340 /// Check if MSA is to be realigned for different rooting of progressive 341 /// alignment tree 342 /// @return True if MSA is to be realigned, false otherwise 343 /// GetRealign(void) const344 bool GetRealign(void) const {return m_Realign;} 345 346 347 /// Use RPS Blast with given database 348 /// 349 /// RPS Blast will be used to find conserved domains in query sequences. 350 /// Pairwise alignments are constrainted so that matching conserved domains 351 /// are aligned. 352 /// Default RPS Blast parameters will be used. RPS Blast parameters can be 353 /// changed with expetr functions. 354 /// @param dbname Path and name of RPS Blast data base [in] 355 /// SetRpsDb(const string & dbname)356 void SetRpsDb(const string& dbname) 357 {m_RpsDb = dbname; m_Mode = m_Mode & ~fNoRpsBlast;} 358 359 360 /// Get RPS Blast data base name 361 /// @return RPS Blast data base name 362 /// GetRpsDb(void) const363 string GetRpsDb(void) const {return m_RpsDb;} 364 365 366 /// Determine if RPS Blast is to be used 367 /// @return True if RPS Blast will be used, false otherwise 368 /// GetUseRpsBlast(void) const369 bool GetUseRpsBlast(void) const {return !m_RpsDb.empty();} 370 371 372 /// Set regular expression patterns for identification of conserved domains. 373 /// Patterns are not freed when object is deleted. 374 /// 375 /// Regular expresion patterns will be used to find conserved domains. 376 /// Pairwise alignmnents will be constained to so that matching conserved 377 /// domains are aligned. Parameter ownership is transfered to options. 378 /// @return Reference to the list of conserved domain patterns 379 /// SetCddPatterns(void)380 vector<CPattern>& SetCddPatterns(void) 381 {m_Mode = fNonStandard; return m_Patterns;} 382 383 384 /// Set default patterns for identification of conserved domains. 385 /// 386 /// Regular expresion patterns will be used to find conserved domains. 387 /// Pairwise alignmnents will be constained to so that matching conserved 388 /// domains are aligned. Parameter ownership is transfered to options. 389 /// 390 void SetDefaultCddPatterns(void); 391 392 393 /// Get regular expression patterns for identification of conserved domains 394 /// @return List of conserved domain patterns 395 /// GetCddPatterns(void) const396 const vector<CPattern>& GetCddPatterns(void) const {return m_Patterns;} 397 398 399 /// Set user constraints. 400 /// 401 /// The constraits are used in progressive alignment. 402 /// @return Reference to the list of user constraints 403 /// SetUserConstraints(void)404 TConstraints& SetUserConstraints(void) 405 {m_Mode = fNonStandard; return m_UserHits;} 406 407 408 /// Get user constraints 409 /// @return User constraits 410 /// GetUserConstraints(void) const411 const TConstraints& GetUserConstraints(void) const 412 {return m_UserHits;} 413 414 415 /// Set score for user alignment constraints 416 /// @param score Score 417 /// SetUserConstraintsScore(int score)418 void SetUserConstraintsScore(int score) 419 {m_UserHitsScore = score; m_Mode = fNonStandard;} 420 421 422 /// Get score for user alignment constraints 423 /// @return Score for user alignmnet constraints 424 /// GetUserConstraintsScore(void) const425 int GetUserConstraintsScore(void) const {return m_UserHitsScore;} 426 427 428 //--- Expert Methods --- 429 430 //--- Query clustering --- 431 432 /// Set word size for creating word count vectors in query clustering 433 /// @param len Number of letters in a word [in] 434 /// SetKmerLength(int len)435 void SetKmerLength(int len) {m_KmerLength = len; m_Mode = fNonStandard;} 436 437 /// Get word size for creating word count vectors 438 /// @return Number of letters in a word 439 /// GetKmerLength(void) const440 int GetKmerLength(void) const {return m_KmerLength;} 441 442 /// Set alphabet for creating word count vectors 443 /// @param alph Alphabet [in] 444 /// SetKmerAlphabet(TKMethods::ECompressedAlphabet alph)445 void SetKmerAlphabet(TKMethods::ECompressedAlphabet alph) 446 {m_KmerAlphabet = alph; m_Mode = fNonStandard;} 447 448 /// Get alphabet used for creating word count vectors 449 /// @return Alphabet 450 /// GetKmerAlphabet(void) const451 TKMethods::ECompressedAlphabet GetKmerAlphabet(void) const 452 {return m_KmerAlphabet;} 453 454 /// Set measure for computing distance between word count vectors 455 /// @param method Distance method [in] 456 /// SetKmerDistMeasure(TKMethods::EDistMeasures method)457 void SetKmerDistMeasure(TKMethods::EDistMeasures method) 458 {m_ClustDistMeasure = method; m_Mode = fNonStandard;} 459 460 461 /// Get method for computing distance between word count vectors 462 /// @return Distance method 463 /// GetKmerDistMeasure(void) const464 TKMethods::EDistMeasures GetKmerDistMeasure(void) const 465 {return m_ClustDistMeasure;} 466 467 468 /// Set maximum allowed distance between sequences in a cluster 469 /// @param dist Maximum allowed distance in cluster [in] 470 /// SetMaxInClusterDist(double dist)471 void SetMaxInClusterDist(double dist) 472 {m_MaxInClusterDist = dist; m_Mode = fNonStandard;} 473 474 /// Get maximum allowed distance between sequences in a cluster 475 /// @return Maxium allowed distance in cluster 476 /// GetMaxInClusterDist(void) const477 double GetMaxInClusterDist(void) const {return m_MaxInClusterDist;} 478 479 480 //--- RPS Blast --- 481 482 /// Set e-value threshold for accepting RPS Blast hits 483 /// @param evalue E-value for acceting RPS Blast hits [in] 484 /// SetRpsEvalue(double evalue)485 void SetRpsEvalue(double evalue) 486 {m_RpsEvalue = evalue; m_Mode = fNonStandard;} 487 488 /// Get e-value threshold for accepting RPS Blast hits 489 /// @return E-value for accepting RPS Blast hits 490 /// GetRpsEvalue(void) const491 double GetRpsEvalue(void) const {return m_RpsEvalue;} 492 493 /// Set hitlist size (per sequence) for domain search 494 /// @param size Hitlist size [in] 495 /// SetDomainHitlistSize(int size)496 void SetDomainHitlistSize(int size) 497 {m_DomainHitlistSize = size; m_Mode = fNonStandard;} 498 499 /// Get hitlist size (per sequence) for domain searches 500 /// @return Hitlist size for domain searches 501 /// GetDomainHitlistSize(void) const502 int GetDomainHitlistSize(void) const {return m_DomainHitlistSize;} 503 504 /// Set boost for residue frequencies in conserved domains from RPS data 505 /// base 506 /// @param boost Boost for RPS residue frequencies [in] 507 /// SetDomainResFreqBoost(double boost)508 void SetDomainResFreqBoost(double boost) 509 {m_DomainResFreqBoost = boost; m_Mode = fNonStandard;} 510 511 /// Get boost for residue frequencies in conserved domains from RPS data 512 /// base 513 /// @return Boost for RPS residue frequencies 514 /// GetDomainResFreqBoost(void) const515 double GetDomainResFreqBoost(void) const {return m_DomainResFreqBoost;} 516 517 /// Set use of precomputed RPS Blast hits 518 /// @param use SetUsePreRpsHits(bool use)519 void SetUsePreRpsHits(bool use) 520 {m_UsePreRpsHits = use; m_Mode = fNonStandard;} 521 522 /// Get use of precomputed RPS Blast hits 523 /// @return 524 /// - True if use precomputed RPS hits is on, 525 /// - False otherwise 526 /// GetUsePreRpsHits(void) const527 bool GetUsePreRpsHits(void) const {return m_UsePreRpsHits;} 528 529 530 //--- Blastp --- 531 532 /// Set e-value for accepting Blastp hits 533 /// @param evalue E-value for accepting Blastp hits [in] 534 /// SetBlastpEvalue(double evalue)535 void SetBlastpEvalue(double evalue) 536 {m_BlastpEvalue = evalue; m_Mode = fNonStandard;} 537 538 /// Get e-value for accepting Blastp hits 539 /// @return E-value for accepting Blastp hits 540 /// GetBlastpEvalue(void) const541 double GetBlastpEvalue(void) const {return m_BlastpEvalue;} 542 543 544 //--- Iterative alignment 545 546 /// Set cutoff score for conserved aligned columns 547 /// @param score Cutoff score [in] 548 /// SetConservedCutoffScore(double score)549 void SetConservedCutoffScore(double score) 550 {m_ConservedCutoff = score; m_Mode = fNonStandard;} 551 552 /// Get cutoff score for conserved aligned columns 553 /// @return Cutoff score 554 /// GetConservedCutoffScore(void) const555 double GetConservedCutoffScore(void) const {return m_ConservedCutoff;} 556 557 /// Set pseudocount for calculating column entropy 558 /// @param pseudocount Pseudocount [in] 559 /// SetPseudocount(double pseudocount)560 void SetPseudocount(double pseudocount) 561 {m_Pseudocount = pseudocount; m_Mode = fNonStandard;} 562 563 /// Get pseudocount for calculating column entropy 564 /// @return Pseudocount 565 /// GetPseudocount(void) const566 double GetPseudocount(void) const {return m_Pseudocount;} 567 568 569 //--- Progressive alignment 570 571 /// Set method for creating tree that guides progressive alignment 572 /// @param method Tree computation method [in] 573 /// SetTreeMethod(ETreeMethod method)574 void SetTreeMethod(ETreeMethod method) 575 {m_TreeMethod = method; m_Mode = fNonStandard;} 576 577 /// Get method for creating tree that guides progressive alignment 578 /// @return Tree method 579 /// GetTreeMethod(void) const580 ETreeMethod GetTreeMethod(void) const {return m_TreeMethod;} 581 582 /// Set frequency boost for a letter that appears in query sequence in 583 /// given position 584 /// @param boost Frequency boost [in] 585 /// SetLocalResFreqBoost(double boost)586 void SetLocalResFreqBoost(double boost) 587 {m_LocalResFreqBoost = boost; m_Mode =fNonStandard;} 588 589 /// Get frequency boost for a letter that appears in query sequence in 590 /// given position 591 /// @return Frequency boost 592 /// GetLocalResFreqBoost(void) const593 double GetLocalResFreqBoost(void) const {return m_LocalResFreqBoost;} 594 595 596 //--- Pairwise alignment 597 598 /// Set alignment socre matrix name 599 /// @param matrix Score matrix name [in] 600 /// SetScoreMatrixName(const string & matrix)601 void SetScoreMatrixName(const string& matrix) 602 {m_MatrixName = matrix; m_Mode = fNonStandard;} 603 604 /// Get alignment score matrix name 605 /// @return Score matrix name 606 /// GetScoreMatrixName(void) const607 string GetScoreMatrixName(void) const {return m_MatrixName;} 608 609 /// Set gap opening penalty for middle gaps in pairwise global alignment 610 /// of profiles 611 /// @param penalty Gap open penalty [in] 612 /// SetGapOpenPenalty(TScore penalty)613 void SetGapOpenPenalty(TScore penalty) 614 {m_GapOpen = penalty; m_Mode = fNonStandard;} 615 616 /// Get gap opening penalty for middle gaps in pairwise global alignment 617 /// of profiles 618 /// @return Gap open penalty 619 /// GetGapOpenPenalty(void) const620 TScore GetGapOpenPenalty(void) const {return m_GapOpen;} 621 622 /// Set gap extension penalty for middle gaps in pairwise global alignment 623 /// of profiles 624 /// @param penalty Gap extension penalty [in] 625 /// SetGapExtendPenalty(TScore penalty)626 void SetGapExtendPenalty(TScore penalty) 627 {m_GapExtend = penalty; m_Mode = fNonStandard;} 628 629 /// Get gap extension penlaty for middle gaps in pairwise global alignment 630 /// of profiles 631 /// @return Gap extension penalty 632 /// GetGapExtendPenalty(void) const633 TScore GetGapExtendPenalty(void) const {return m_GapExtend;} 634 635 /// Set gap opening penalty for end gaps in pairwise global alignment 636 /// of profiles 637 /// @param penalty Gap open penalty [in] 638 /// SetEndGapOpenPenalty(TScore penalty)639 void SetEndGapOpenPenalty(TScore penalty) 640 {m_EndGapOpen = penalty; m_Mode = fNonStandard;} 641 642 /// Get gap opening penalty for end gaps in pairwise global alignment 643 /// of profiles 644 /// @return Gap open penalty 645 /// GetEndGapOpenPenalty(void) const646 TScore GetEndGapOpenPenalty(void) const {return m_EndGapOpen;} 647 648 /// Set gap extension penalty for end gaps in pairwise global alignment 649 /// of profiles 650 /// @param penalty Gap extension penalty [in] 651 /// SetEndGapExtendPenalty(TScore penalty)652 void SetEndGapExtendPenalty(TScore penalty) 653 {m_EndGapExtend = penalty; m_Mode = fNonStandard;} 654 655 /// Get gap extension penalty for end gaps in pairwise global alignment 656 /// of profiles 657 /// @return Gap extension penalty 658 /// GetEndGapExtendPenalty(void) const659 TScore GetEndGapExtendPenalty(void) const {return m_EndGapExtend;} 660 661 662 //--- Other --- 663 664 /// Get options mode 665 /// @return Options mode 666 /// GetMode(void) const667 TMode GetMode(void) const {return m_Mode;} 668 669 /// Check whether parameter values belong to any of the standard modes 670 /// 671 /// The mode is standard when parameters are not changed after the options 672 /// object is created, with exception of RPS database name. 673 /// @return 674 /// True if parameter values belong to a stanard mode, 675 /// False otherwise 676 /// IsStandardMode(void) const677 bool IsStandardMode(void) const {return !(m_Mode & fNonStandard);} 678 679 /// Set verbose mode 680 /// 681 /// If set, intermidiate results will be provided in stdout 682 /// @param verbose Verbose mode set if true, not set otherwise [in] 683 /// SetVerbose(bool verbose)684 void SetVerbose(bool verbose) {m_Verbose = verbose;} 685 686 /// Get verbose mode 687 /// 688 /// If set, intermidiate results will be provided in stdout 689 /// @return 690 /// - true if verbose mode set 691 /// - false otherwise GetVerbose(void) const692 bool GetVerbose(void) const {return m_Verbose;} 693 SetInClustAlnMethod(EInClustAlnMethod method)694 void SetInClustAlnMethod(EInClustAlnMethod method) 695 {m_InClustAlnMethod = method;} 696 GetInClustAlnMethod(void) const697 EInClustAlnMethod GetInClustAlnMethod(void) const 698 {return m_InClustAlnMethod;} 699 700 /// Set pre-computed domain hits 701 /// @param archive Blast4 archive with precomputed domain hits [in] 702 /// SetDomainHits(CConstRef<objects::CBlast4_archive> archive)703 void SetDomainHits(CConstRef<objects::CBlast4_archive> archive) 704 {m_DomainHits = archive;} 705 706 /// Get pre-computed domain hits 707 /// @return Blast4 archive with pre-computed domain hits 708 /// GetDomainHits(void) const709 CConstRef<objects::CBlast4_archive> GetDomainHits(void) const 710 {return m_DomainHits;} 711 712 /// Are pre-computed domain hits set 713 /// @return true if pre-computed domain hits are set, false otherwise 714 /// CanGetDomainHits(void) const715 bool CanGetDomainHits(void) const 716 {return !m_DomainHits.Empty();} 717 718 /// Check if fast alignment is to be used 719 /// 720 /// Fast alignment means that constraints will be used instead of profile- 721 /// profile alignment 722 /// @return If true, fast alignment will be used, regural otherwise 723 /// GetFastAlign(void) const724 bool GetFastAlign(void) const {return m_FastAlign;} 725 726 /// Turn fast alignment method on/off 727 /// 728 /// Fast alignment means that constraints will be used instead of profile- 729 /// profile alignment 730 /// @param Fast alignment will be used if true [in] 731 /// SetFastAlign(bool f)732 void SetFastAlign(bool f) {m_FastAlign = f; m_Mode = fNonStandard;} 733 734 /// Get central sequence 735 /// 736 /// This is used for fast alignment of BLAST results. One sequence 737 /// (typically BLAST query) is made artificially similar to all other 738 /// sequences to make fast alignment more agreeable with BLAST alignments. 739 /// @param Zero-based index of the central sequence [in] 740 /// GetCentralSeq(void) const741 int GetCentralSeq(void) const {return m_CentralSeq;} 742 743 /// Set central sequence 744 /// 745 /// This is used for fast alignment of BLAST results. One sequence 746 /// (typically BLAST query) is made artificially similar to all other 747 /// sequences to make fast alignment more agreeable with BLAST alignments. 748 /// @return Zero-based index of the central sequence 749 /// SetCentralSeq(int c)750 void SetCentralSeq(int c) {m_CentralSeq = c;} 751 752 753 //--- Options validation --- 754 755 /// Validate parameter values 756 /// @return True if parameters valid, false otherwise 757 /// 758 bool Validate(void); 759 760 /// Get warning messages 761 /// @return Warning messages 762 /// GetMessages(void)763 const vector<string>& GetMessages(void) {return m_Messages;} 764 765 private: 766 767 /// Forbidding copy constructor 768 CMultiAlignerOptions(const CMultiAlignerOptions&); 769 770 /// Forbidding assignment operator 771 CMultiAlignerOptions& operator=(const CMultiAlignerOptions&); 772 773 /// Initiate parameter values based on the specified mode 774 /// @param mode Mode 775 void x_InitParams(TMode mode); 776 777 778 private: 779 780 TMode m_Mode; 781 782 // Query clustering 783 bool m_UseQueryClusters; 784 TKMethods::ECompressedAlphabet m_KmerAlphabet; 785 unsigned int m_KmerLength; 786 double m_MaxInClusterDist; 787 TKMethods::EDistMeasures m_ClustDistMeasure; 788 EInClustAlnMethod m_InClustAlnMethod; 789 int m_CentralSeq; 790 791 // RPS Blast 792 string m_RpsDb; 793 double m_RpsEvalue; 794 int m_DomainHitlistSize; 795 double m_DomainResFreqBoost; 796 bool m_UsePreRpsHits; 797 798 // Blastp 799 double m_BlastpEvalue; 800 801 // Patterns 802 vector<CPattern> m_Patterns; 803 804 // Iterative alignmnet 805 bool m_Iterate; 806 double m_ConservedCutoff; 807 double m_Pseudocount; 808 809 // Realign MSA for different progressive alignment tree rooting 810 bool m_Realign; 811 812 // Skip profile-profile alignment whenever possible and use information 813 // from constraints to align sequences and profiles 814 bool m_FastAlign; 815 816 // User constraints 817 TConstraints m_UserHits; 818 int m_UserHitsScore; 819 820 // Progressive alignment 821 ETreeMethod m_TreeMethod; 822 double m_LocalResFreqBoost; 823 824 // Pairwise alignment 825 string m_MatrixName; 826 TScore m_GapOpen; 827 TScore m_GapExtend; 828 TScore m_EndGapOpen; 829 TScore m_EndGapExtend; 830 831 // Pre-computed hits 832 CConstRef<objects::CBlast4_archive> m_DomainHits; 833 834 bool m_Verbose; 835 836 vector<string> m_Messages; 837 838 static const int kDefaultUserConstraintsScore = 1000000; 839 }; 840 841 END_SCOPE(cobalt) 842 END_NCBI_SCOPE 843 844 #endif /* ALGO_COBALT___COBALT_OPTIONS__HPP */ 845