1 #ifndef ALGO_BLAST_GUMBEL_PARAMS__GUMBEL_PARAMS___HPP 2 #define ALGO_BLAST_GUMBEL_PARAMS__GUMBEL_PARAMS___HPP 3 4 /* $Id: gumbel_params.hpp 575325 2018-11-27 18:22:00Z ucko $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's offical duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * ===========================================================================*/ 28 29 /***************************************************************************** 30 31 File name: gumbel_params.hpp 32 33 Author: Greg Boratyn, Sergey Sheetlin 34 35 Contents: Wrapper classes for real time Gumbel parameters computing code 36 37 ******************************************************************************/ 38 39 #include <corelib/ncbiobj.hpp> 40 #include <algo/blast/gumbel_params/general_score_matrix.hpp> 41 #include <vector> 42 43 44 BEGIN_NCBI_SCOPE 45 BEGIN_SCOPE(blast) 46 47 class CGumbelParamsRandDiagnostics; 48 49 50 /// Input parameters for Gumbel parameters calculation 51 /// 52 class CGumbelParamsOptions : public CObject 53 { 54 public: 55 typedef vector<double> TFrequencies; 56 57 public: 58 59 //---- Constructors ----// 60 61 /// Create empty options object 62 /// 63 CGumbelParamsOptions(void); 64 65 66 /// Create options with given score matrix and residue probabilities 67 /// @param smatrix Score matrix values [in] 68 /// @param seq1_residue_probs Residue probabilities for sequence 1 [in] 69 /// @param seq2_residue_probs Residue probabilities for sequence 2 [in] 70 /// 71 /// The score matrix can be non-symmetric 72 CGumbelParamsOptions(const CGeneralScoreMatrix* smatrix, 73 const vector<double>& seq1_residue_probs, 74 const vector<double>& seq2_residue_rpobs); 75 76 /// Copy constructor 77 /// @param options Options object [in] 78 /// 79 CGumbelParamsOptions(const CGumbelParamsOptions& options); 80 81 82 //---- Getters and Setters ----// 83 84 /// Set the value of gap opening penalty 85 /// @param g_open Gap opening penalty [in] 86 /// SetGapOpening(Int4 g_open)87 void SetGapOpening(Int4 g_open) {m_GapOpening = g_open;} 88 89 /// Get gap opening penalty 90 /// @return Gap opening penalty 91 /// GetGapOpening(void) const92 Int4 GetGapOpening(void) const {return m_GapOpening;} 93 94 /// Set gap extention penalty 95 /// @param g_exten Gap extension penalty [in] 96 /// SetGapExtension(Int4 g_exten)97 void SetGapExtension(Int4 g_exten) {m_GapExtension = g_exten;} 98 99 /// Get gap extention penalty 100 /// @return Gap extension penalty 101 /// GetGapExtension(void) const102 Int4 GetGapExtension(void) const {return m_GapExtension;} 103 104 /// Set relative error threshold for lambda parameter calculation 105 /// for gapped aligmment 106 /// @param lambda_err Relative error threshold [in] 107 /// 108 /// For gapless regime calculation accuracy for all parameters is 109 /// always 1e-06 m_LambdaAccuracy and m_KAccuary are not used. SetLambdaAccuracy(double lambda_err)110 void SetLambdaAccuracy(double lambda_err) 111 {m_LambdaAccuracy = lambda_err;} 112 113 /// Get relative error threshold for lambda parameter calculation 114 /// for gapped aligmment 115 /// @return Relative error threshold for lambda calculation 116 /// 117 /// For gapless regime calculation accuracy for all parameters is 118 /// always 1e-06 m_LambdaAccuracy and m_KAccuary are not used. GetLambdaAccuracy(void) const119 double GetLambdaAccuracy(void) const {return m_LambdaAccuracy;} 120 121 /// Set relative error threshold for K parameter calculation for gapped 122 /// alignment 123 /// @param k_err Relative error threshold [in] 124 /// 125 /// For gapless regime calculation accuracy for all parameters is 126 /// always 1e-06 m_LambdaAccuracy and m_KAccuary are not used. SetKAccuracy(double k_err)127 void SetKAccuracy(double k_err) {m_KAccuracy = k_err;} 128 129 /// Get relative error threshold for K parameter calculation for gapped 130 /// alignment 131 /// @return Relative error threshold for K parameter calculation 132 /// 133 /// For gapless regime calculation accuracy for all parameters is 134 /// always 1e-06 m_LambdaAccuracy and m_KAccuary are not used. GetKAccuracy(void) const135 double GetKAccuracy(void) const {return m_KAccuracy;} 136 137 /// Set gapped/gapless regime 138 /// @param gapped Regime (gapped if true, gapless otherwise) [in] 139 /// 140 /// For gapless regime calculation accuracy for all parameters is 141 /// always 1e-06 m_LambdaAccuracy and m_KAccuary are not used. SetGapped(bool gapped)142 void SetGapped(bool gapped) {m_IsGapped = gapped;} 143 144 /// Get gapped/gapless regime 145 /// @return true if gapped, false otherwise 146 /// GetGapped(void) const147 bool GetGapped(void) const {return m_IsGapped;} 148 149 /// Set score matrix 150 /// @param smatrix Score matrix values [in] 151 /// 152 void SetScoreMatrix(const CGeneralScoreMatrix& smatrix); 153 154 /// Set score matrix 155 /// @param smatrix Score matrix [in] 156 /// 157 void SetScoreMatrix(const CRef<CGeneralScoreMatrix>& smatrix); 158 159 /// Get score matrix 160 /// @return Pointer to score matrix 161 /// GetScoreMatrix(void) const162 const Int4** GetScoreMatrix(void) const 163 {return (*m_ScoreMatrix).GetMatrix();} 164 165 /// Set residue probabilities for sequence 1 166 /// @param probs Residue probabilities [in] 167 /// SetSeq1ResidueProbs(const TFrequencies & probs)168 void SetSeq1ResidueProbs(const TFrequencies& probs) 169 {m_Seq1ResidueProbs = probs;} 170 171 172 /// Get sequence 1 residue probabilities 173 /// @return Residue probabilities for sequence 1 174 /// GetSeq1ResidueProbs(void) const175 const TFrequencies& GetSeq1ResidueProbs(void) const 176 {return m_Seq1ResidueProbs;} 177 178 /// Set residue probabilities for sequence 2 179 /// @param probs Residue probabilities [in] 180 /// SetSeq2ResidueProbs(const TFrequencies & probs)181 void SetSeq2ResidueProbs(const TFrequencies& probs) 182 {m_Seq2ResidueProbs = probs;} 183 184 /// Get sequence 2 residue probabilities 185 /// @return Residue probabilities for sequence 2 186 /// GetSeq2ResidueProbs(void) const187 const TFrequencies& GetSeq2ResidueProbs(void) const 188 {return m_Seq2ResidueProbs;} 189 190 /// Set maximum calculation time allowed 191 /// @param time Maximum calculation time [in] 192 /// 193 /// Since the gapless calculation is also run during the gapped calculation 194 /// (only for a and alpha ungapped parameters) it means that the total 195 /// calculation time is equal to time(gapless calculation)+time(gapped 196 /// calculation). Typically gapless calculation is quick but sometimes may 197 /// take time. The program sets maximum allowed calculation time for gapless 198 /// calculation as 0.5*(total maximum time). If the result is not received 199 /// during this time then the program reports an error (throw an exception). 200 /// After successful ungapped calculation the program calculates during the 201 /// remaining available time and uses it for the gapped calculation. 202 /// If the user requests gapless calculation only (m_IsGapped=false) then 203 /// the program uses total allowed calculation time for the gapless 204 /// calculation. If there is no result during this time then the program 205 /// generates an error. SetMaxCalcTime(double time)206 void SetMaxCalcTime(double time) {m_MaxCalcTime = time;} 207 208 /// Get maximum calculation time allowed 209 /// @return Maximum calculation time 210 /// 211 /// Since the gapless calculation is also run during the gapped calculation 212 /// (only for a and alpha ungapped parameters) it means that the total 213 /// calculation time is equal to time(gapless calculation)+time(gapped 214 /// calculation). Typically gapless calculation is quick but sometimes may 215 /// take time. The program sets maximum allowed calculation time for gapless 216 /// calculation as 0.5*(total maximum time). If the result is not received 217 /// during this time then the program reports an error (throw an exception). 218 /// After successful ungapped calculation the program calculates during the 219 /// remaining available time and uses it for the gapped calculation. 220 /// If the user requests gapless calculation only (m_IsGapped=false) then 221 /// the program uses total allowed calculation time for the gapless 222 /// calculation. If there is no result during this time then the program 223 /// generates an error. GetMaxCalcTime(void) const224 double GetMaxCalcTime(void) const {return m_MaxCalcTime;} 225 226 /// Set maximum memory allowed for computation 227 /// @param Memory limit [in] 228 /// SetMaxCalcMemory(double mem)229 void SetMaxCalcMemory(double mem) {m_MaxCalcMemory = mem;} 230 231 /// Get maximum memory allowed for computation 232 /// @return Memory limit 233 /// GetMaxCalcMemory(void) const234 double GetMaxCalcMemory(void) const {return m_MaxCalcMemory;} 235 236 /// Get number of residues in utilized alphabet 237 /// @return Number of residues 238 /// GetNumResidues(void) const239 Int4 GetNumResidues(void) const {return m_NumResidues;} 240 241 //---- Options Validation ----// 242 243 /// Validate parameter values 244 /// 245 /// Exception is thrown if options do not pass validation. False is 246 /// returned if there are warnings. 247 /// @return True if options passed validation and false if there 248 /// are warnings 249 /// 250 bool Validate(void); 251 252 /// Check whether there are any error/warning messages 253 /// @return True if there are messages, false otherwise 254 /// IsMessage(void) const255 bool IsMessage(void) const {return !m_Messages.empty();} 256 257 /// Get error/warning messages 258 /// 259 /// Warning messages can be generated by the Validate() function if 260 /// parameters are within their domains but their values are non-typical or 261 /// can result in increased inaccuracies 262 /// @return List of error/warning messages 263 /// GetMessages(void) const264 const vector<string>& GetMessages(void) const {return m_Messages;} 265 266 protected: 267 void x_Init(void); 268 269 /// Get socre matrix value for i-th and j-th residues x_GetScore(Uint4 i,Uint4 j) const270 Int4 x_GetScore(Uint4 i, Uint4 j) const 271 {return (*m_ScoreMatrix).GetScore(i, j);} 272 273 private: 274 /// Forbid assignment operator 275 CGumbelParamsOptions& operator=(const CGumbelParamsOptions&); 276 277 278 protected: 279 280 /// Gap opening penalty 281 Int4 m_GapOpening; 282 283 /// Gap extension penalty 284 Int4 m_GapExtension; 285 286 /// Desired accuracy for lambda computation 287 /// only for gapped aligmment 288 double m_LambdaAccuracy; 289 290 /// Desired accuracy for parameter K computation 291 /// only for gapped alignment 292 double m_KAccuracy; 293 294 /// Gapped/gapless regime, true for gapped, false for gapless. 295 /// For gapless regime calculation accuracy for all parameters is 296 /// always 1e-06 m_LambdaAccuracy and m_KAccuary are not used. 297 bool m_IsGapped; 298 299 /// Scoring matrix 300 CConstRef<CGeneralScoreMatrix> m_ScoreMatrix; 301 302 /// Residue frequencies for sequence 1 303 TFrequencies m_Seq1ResidueProbs; 304 305 /// Residue frequencies for sequence 2 306 TFrequencies m_Seq2ResidueProbs; 307 308 /// Number of residues in alphabet 309 Int4 m_NumResidues; 310 311 /// Maximum allowed calculation time 312 double m_MaxCalcTime; 313 314 /// Maximum allowed calculation memory 315 double m_MaxCalcMemory; 316 317 /// Warning messages 318 vector<string> m_Messages; 319 }; 320 321 /// Class used for creation of sets of input parameters 322 /// 323 class CGumbelParamsOptionsFactory 324 { 325 326 public: 327 /// Creates standard options with score matrix and residue frequenceis for 328 /// 20 aa alphabet 329 /// @param smat Score matrix name [in] 330 /// @return Gumbel params options 331 /// 332 static CRef<CGumbelParamsOptions> CreateStandard20AAOptions( 333 CGeneralScoreMatrix::EScoreMatrixName smat 334 = CGeneralScoreMatrix::eBlosum62); 335 336 337 /// Creates standard options with no score matrix or resiudie frequencies 338 /// @return Gumbel params options 339 /// 340 static CRef<CGumbelParamsOptions> CreateBasicOptions(void); 341 }; 342 343 344 /// Gumbel parameters and estimation errors 345 /// 346 typedef struct SGumbelParams 347 { 348 // Gapped parameters 349 double lambda; 350 double K; 351 double C; 352 double sigma; 353 double alpha_i; 354 double alpha_j; 355 double ai; 356 double aj; 357 358 // Estimation errors for gapped parameters 359 double lambda_error; 360 double K_error; 361 double C_error; 362 double sigma_error; 363 double alpha_i_error; 364 double alpha_j_error; 365 double ai_error; 366 double aj_error; 367 368 // Gapless parameters 369 double gapless_alpha; 370 double gapless_a; 371 372 // Estimation errors for gapless parameters 373 double gapless_alpha_error; 374 double gapless_a_error; 375 376 Int4 G; 377 378 } SGumbelParams; 379 380 381 /// Result of Gumbel parameters calculation along with diagnostic info 382 /// 383 class CGumbelParamsResult : public CObject 384 { 385 public: 386 387 struct SSbsArrays { 388 vector<double> lambda_sbs; 389 vector<double> K_sbs; 390 vector<double> C_sbs; 391 vector<double> sigma_sbs; 392 vector<double> alpha_i_sbs; 393 vector<double> alpha_j_sbs; 394 vector<double> ai_sbs; 395 vector<double> aj_sbs; 396 }; 397 398 public: 399 //---- Constructors ----// 400 401 /// Create empty results 402 /// CGumbelParamsResult(void)403 CGumbelParamsResult(void) {} 404 405 406 //---- Methods that get the results ----// 407 408 409 /// Set Gumbel parameters values 410 /// @return Gumbel parameters 411 /// SetGumbelParams(void)412 SGumbelParams& SetGumbelParams(void) {return m_GumbelParams;} 413 414 /// Get Gubmel parameters 415 /// @return Gumbel parameters 416 /// GetGumbelParams(void) const417 const SGumbelParams& GetGumbelParams(void) const {return m_GumbelParams;} 418 419 /// Set Sbs arrays 420 /// @return Sbs arrays 421 /// SetSbsArrays(void)422 SSbsArrays& SetSbsArrays(void) {return m_SbsArrays;} 423 424 /// Get Sbs arrays 425 /// @return Sbs arrays 426 /// GetSbsArrays(void) const427 const SSbsArrays& GetSbsArrays(void) const {return m_SbsArrays;} 428 429 /// Set calculation time 430 /// @param time Calculation time [in] 431 /// SetCalcTime(double time)432 void SetCalcTime(double time) {m_CalcTime = time;} 433 434 /// Get calculation time 435 /// @return calculation time 436 /// GetCalcTime(void) const437 double GetCalcTime(void) const {return m_CalcTime;} 438 439 protected: 440 /// Forbid Copy constructor 441 CGumbelParamsResult(const CGumbelParamsResult& result); 442 443 /// Forbid assignment operator 444 CGumbelParamsResult& operator=(const CGumbelParamsResult&); 445 446 447 protected: 448 449 SGumbelParams m_GumbelParams; 450 SSbsArrays m_SbsArrays; 451 452 double m_CalcTime; 453 }; 454 455 456 /// Wrapper for Gumbel parameter calculation 457 /// 458 class CGumbelParamsCalc : public CObject 459 { 460 public: 461 //---- Constructors ----// 462 463 /// Create calculation object with given options 464 /// @param opts Gumbel params options [in] 465 /// 466 CGumbelParamsCalc(const CRef<CGumbelParamsOptions>& opts); 467 468 469 /// Create calculation object with given options and randomization 470 /// parameters. The constructor fixes randomization parameters and 471 /// makes results predictible. Should be used only for testing. 472 /// @param opts Gumbel params options [in] 473 /// @param rand_opts Gumbel params randomization parameters 474 /// 475 CGumbelParamsCalc(const CRef<CGumbelParamsOptions>& opts, 476 const CRef<CGumbelParamsRandDiagnostics>& rand_opts); 477 478 //---- Options ----// 479 480 /// Set randomization parameters. Fixes randomization parameters and 481 /// makes results predictible. Should be used only for testing. 482 /// @param rand_opts Randomization parameters 483 /// SetRandParams(const CRef<CGumbelParamsRandDiagnostics> & rand_opts)484 void SetRandParams(const CRef<CGumbelParamsRandDiagnostics>& rand_opts) 485 {m_RandParams = rand_opts;} 486 487 488 //---- Calculation ----// 489 490 /// Perform Gumbel parameter calculation 491 /// @return Gumbel parameters 492 /// 493 CRef<CGumbelParamsResult> Run(void); 494 495 496 //---- Getting results ----// 497 498 /// Get randomization parameters 499 /// @return Randomization parameters 500 /// GetRandParams(void)501 CRef<CGumbelParamsRandDiagnostics> GetRandParams(void) {return m_RandParams;} 502 503 /// Get claculation options 504 /// @return Options 505 /// GetOptions(void) const506 CConstRef<CGumbelParamsOptions> GetOptions(void) const {return m_Options;} 507 508 509 510 /// Get computation result 511 /// @return Gumbel parameters and diagnostics 512 /// GetResult(void)513 CRef<CGumbelParamsResult> GetResult(void) {return m_Result;} 514 GetGumbelParams(void) const515 const SGumbelParams& GetGumbelParams(void) const 516 {return m_Result->GetGumbelParams();} 517 518 protected: 519 /// Forbid copy constructor 520 CGumbelParamsCalc(const CGumbelParamsCalc&); 521 522 /// Forbid assignment oprator 523 CGumbelParamsCalc& operator=(const CGumbelParamsCalc&); 524 525 526 protected: 527 CConstRef<CGumbelParamsOptions> m_Options; 528 CRef<CGumbelParamsRandDiagnostics> m_RandParams; 529 530 CRef<CGumbelParamsResult> m_Result; 531 532 public: 533 /// Major version 534 static const int kMajorVersion = 2; 535 /// Minor version 536 static const int kMinorVersion = 0; 537 /// Patch version 538 static const int kPatchVersion = 0; 539 }; 540 541 542 /// Options that control random values used in internal parts of Gumbel 543 /// parameter calculation for gapped aligmment. This class should be used 544 /// only for diagnostics. Supplying this class as argument to 545 /// CGumbelParamsCalc::Run() yileds deterministic results. 546 /// 547 class CGumbelParamsRandDiagnostics : public CObject 548 { 549 public: 550 551 /// Constructor CGumbelParamsRandDiagnostics(void)552 CGumbelParamsRandDiagnostics(void) {} 553 554 /// Set random seed 555 /// @param val Random seed [in] 556 /// SetRandomSeed(Uint4 val)557 void SetRandomSeed(Uint4 val) {m_RandomSeed = val;} 558 559 /// Get random seed 560 /// @return Random seed 561 /// GetRandomSeed(void) const562 Uint4 GetRandomSeed(void) const {return m_RandomSeed;} 563 564 /// Set first stage preliminary realizations numbers 565 /// @return Array of first stage preliminary realizations numbers 566 /// SetFirstStagePrelimReNumbers(void)567 vector<Int4>& SetFirstStagePrelimReNumbers(void) 568 {return m_FirstStagePrelimReNumbers;} 569 570 /// Get first stage preliminary realizations numbers 571 /// @return First stage preliminary realizations numbers 572 /// GetFirstStagePrelimReNumbers(void) const573 const vector<Int4>& GetFirstStagePrelimReNumbers(void) const 574 {return m_FirstStagePrelimReNumbers;} 575 576 /// Set preliminary realizations numbers 577 /// @return Array of preliminary realizations numbers 578 /// SetPrelimReNumbers(void)579 vector<Int4>& SetPrelimReNumbers(void) 580 {return m_PrelimReNumbers;} 581 582 /// Get preliminary realizations numbers 583 /// @return Preliminary realizations numbers 584 /// GetPrelimReNumbers(void) const585 const vector<Int4>& GetPrelimReNumbers(void) const 586 {return m_PrelimReNumbers;} 587 588 /// Set perliminary realizations numbers killing array 589 /// @return Array of preliminary realizations numbers killing 590 /// SetPrelimReNumbersKilling(void)591 vector<Int4>& SetPrelimReNumbersKilling(void) 592 {return m_PrelimReNumbersKilling;} 593 594 /// Get perliminary realizations numbers killing array 595 /// @return Preliminary realizations numbers killing 596 /// GetPrelimReNumbersKilling(void) const597 const vector<Int4>& GetPrelimReNumbersKilling(void) const 598 {return m_PrelimReNumbersKilling;} 599 600 /// Set total realizations number 601 /// @param num Total realizations number [in] 602 /// SetTotalReNumber(Int4 num)603 void SetTotalReNumber(Int4 num) {m_TotalReNumber = num;} 604 605 /// Get total realizations number 606 /// @return Total realizations number 607 /// GetTotalReNumber(void) const608 Int4 GetTotalReNumber(void) const {return m_TotalReNumber;} 609 610 /// Set total realizations number killing 611 /// @param num Total realizations number killing [in] 612 /// SetTotalReNumberKilling(Int4 num)613 void SetTotalReNumberKilling(Int4 num) {m_TotalReNumberKilling = num;} 614 615 /// Get total realizations number killing 616 /// @return Total realizations number killing [in] 617 /// GetTotalReNumberKilling(void) const618 Int4 GetTotalReNumberKilling(void) const {return m_TotalReNumberKilling;} 619 620 621 private: 622 623 /// Random seed 624 Uint4 m_RandomSeed; 625 626 /// Frist stage preliminary realizations numbers ALP 627 vector<Int4> m_FirstStagePrelimReNumbers; 628 629 // Preliminary realizations numbers ALP 630 vector<Int4> m_PrelimReNumbers; 631 632 /// Preliminary realizations numbers killing 633 vector<Int4> m_PrelimReNumbersKilling; 634 635 /// Total realizations number ALP 636 Int4 m_TotalReNumber; 637 638 // Total realizations number killing 639 Int4 m_TotalReNumberKilling; 640 }; 641 642 /// Exception class 643 class CGumbelParamsException : public CException 644 { 645 public: 646 enum EErrCode { 647 eInvalidOptions, 648 eLimitsExceeded, 649 eParamsError, 650 eMemAllocError, 651 eUnexpectedError, 652 eRepeatCalc 653 }; 654 GetErrCodeString(void) const655 virtual const char* GetErrCodeString(void) const override { 656 return "eInvalid"; 657 } 658 659 NCBI_EXCEPTION_DEFAULT(CGumbelParamsException, CException); 660 }; 661 662 END_SCOPE(blast) 663 END_NCBI_SCOPE 664 665 #endif // ALGO_BLAST_GUMBEL_PARAMS__GUMBEL_PARAMS___HPP 666 667 668