1 #ifndef ALGO_BLAST_GUMBEL_PARAMS__GUMBEL_PARAMS___HPP
2 #define ALGO_BLAST_GUMBEL_PARAMS__GUMBEL_PARAMS___HPP
3 
4 /* $Id: gumbel_params.hpp 575325 2018-11-27 18:22:00Z ucko $
5 * ===========================================================================
6 *
7 *                            PUBLIC DOMAIN NOTICE
8 *               National Center for Biotechnology Information
9 *
10 *  This software/database is a "United States Government Work" under the
11 *  terms of the United States Copyright Act.  It was written as part of
12 *  the author's offical duties as a United States Government employee and
13 *  thus cannot be copyrighted.  This software/database is freely available
14 *  to the public for use. The National Library of Medicine and the U.S.
15 *  Government have not placed any restriction on its use or reproduction.
16 *
17 *  Although all reasonable efforts have been taken to ensure the accuracy
18 *  and reliability of the software and data, the NLM and the U.S.
19 *  Government do not and cannot warrant the performance or results that
20 *  may be obtained by using this software or data. The NLM and the U.S.
21 *  Government disclaim all warranties, express or implied, including
22 *  warranties of performance, merchantability or fitness for any particular
23 *  purpose.
24 *
25 *  Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================*/
28 
29 /*****************************************************************************
30 
31 File name: gumbel_params.hpp
32 
33 Author: Greg Boratyn, Sergey Sheetlin
34 
35 Contents: Wrapper classes for real time Gumbel parameters computing code
36 
37 ******************************************************************************/
38 
39 #include <corelib/ncbiobj.hpp>
40 #include <algo/blast/gumbel_params/general_score_matrix.hpp>
41 #include <vector>
42 
43 
44 BEGIN_NCBI_SCOPE
45 BEGIN_SCOPE(blast)
46 
47 class CGumbelParamsRandDiagnostics;
48 
49 
50 /// Input parameters for Gumbel parameters calculation
51 ///
52 class CGumbelParamsOptions : public CObject
53 {
54 public:
55     typedef vector<double> TFrequencies;
56 
57 public:
58 
59     //---- Constructors ----//
60 
61     /// Create empty options object
62     ///
63     CGumbelParamsOptions(void);
64 
65 
66     /// Create options with given score matrix and residue probabilities
67     /// @param smatrix Score matrix values [in]
68     /// @param seq1_residue_probs Residue probabilities for sequence 1 [in]
69     /// @param seq2_residue_probs Residue probabilities for sequence 2 [in]
70     ///
71     /// The score matrix can be non-symmetric
72     CGumbelParamsOptions(const CGeneralScoreMatrix* smatrix,
73                          const vector<double>& seq1_residue_probs,
74                          const vector<double>& seq2_residue_rpobs);
75 
76     /// Copy constructor
77     /// @param  options Options object [in]
78     ///
79     CGumbelParamsOptions(const CGumbelParamsOptions& options);
80 
81 
82     //---- Getters and Setters ----//
83 
84     /// Set the value of gap opening penalty
85     /// @param g_open Gap opening penalty [in]
86     ///
SetGapOpening(Int4 g_open)87     void SetGapOpening(Int4 g_open) {m_GapOpening = g_open;}
88 
89     /// Get gap opening penalty
90     /// @return Gap opening penalty
91     ///
GetGapOpening(void) const92     Int4 GetGapOpening(void) const {return m_GapOpening;}
93 
94     /// Set gap extention penalty
95     /// @param g_exten Gap extension penalty [in]
96     ///
SetGapExtension(Int4 g_exten)97     void SetGapExtension(Int4 g_exten) {m_GapExtension = g_exten;}
98 
99     /// Get gap extention penalty
100     /// @return Gap extension penalty
101     ///
GetGapExtension(void) const102     Int4 GetGapExtension(void) const {return m_GapExtension;}
103 
104     /// Set relative error threshold for lambda parameter calculation
105     /// for gapped aligmment
106     /// @param lambda_err Relative error threshold [in]
107     ///
108     /// For gapless regime calculation accuracy for all parameters is
109     /// always 1e-06 m_LambdaAccuracy and m_KAccuary are not used.
SetLambdaAccuracy(double lambda_err)110     void SetLambdaAccuracy(double lambda_err)
111     {m_LambdaAccuracy = lambda_err;}
112 
113     /// Get relative error threshold for lambda parameter calculation
114     /// for gapped aligmment
115     /// @return Relative error threshold for lambda calculation
116     ///
117     /// For gapless regime calculation accuracy for all parameters is
118     /// always 1e-06 m_LambdaAccuracy and m_KAccuary are not used.
GetLambdaAccuracy(void) const119     double GetLambdaAccuracy(void) const {return m_LambdaAccuracy;}
120 
121     /// Set relative error threshold for K parameter calculation for gapped
122     /// alignment
123     /// @param k_err Relative error threshold [in]
124     ///
125     /// For gapless regime calculation accuracy for all parameters is
126     /// always 1e-06 m_LambdaAccuracy and m_KAccuary are not used.
SetKAccuracy(double k_err)127     void SetKAccuracy(double k_err) {m_KAccuracy = k_err;}
128 
129     /// Get relative error threshold for K parameter calculation for gapped
130     /// alignment
131     /// @return Relative error threshold for K parameter calculation
132     ///
133     /// For gapless regime calculation accuracy for all parameters is
134     /// always 1e-06 m_LambdaAccuracy and m_KAccuary are not used.
GetKAccuracy(void) const135     double GetKAccuracy(void) const {return m_KAccuracy;}
136 
137     /// Set gapped/gapless regime
138     /// @param gapped Regime (gapped if true, gapless otherwise) [in]
139     ///
140     /// For gapless regime calculation accuracy for all parameters is
141     /// always 1e-06 m_LambdaAccuracy and m_KAccuary are not used.
SetGapped(bool gapped)142     void SetGapped(bool gapped) {m_IsGapped = gapped;}
143 
144     /// Get gapped/gapless regime
145     /// @return true if gapped, false otherwise
146     ///
GetGapped(void) const147     bool GetGapped(void) const {return m_IsGapped;}
148 
149     /// Set score matrix
150     /// @param smatrix Score matrix values [in]
151     ///
152     void SetScoreMatrix(const CGeneralScoreMatrix& smatrix);
153 
154     /// Set score matrix
155     /// @param smatrix Score matrix [in]
156     ///
157     void SetScoreMatrix(const CRef<CGeneralScoreMatrix>& smatrix);
158 
159     /// Get score matrix
160     /// @return Pointer to score matrix
161     ///
GetScoreMatrix(void) const162     const Int4** GetScoreMatrix(void) const
163     {return (*m_ScoreMatrix).GetMatrix();}
164 
165     /// Set residue probabilities for sequence 1
166     /// @param probs Residue probabilities [in]
167     ///
SetSeq1ResidueProbs(const TFrequencies & probs)168     void SetSeq1ResidueProbs(const TFrequencies& probs)
169     {m_Seq1ResidueProbs = probs;}
170 
171 
172     /// Get sequence 1 residue probabilities
173     /// @return Residue probabilities for sequence 1
174     ///
GetSeq1ResidueProbs(void) const175     const TFrequencies& GetSeq1ResidueProbs(void) const
176     {return m_Seq1ResidueProbs;}
177 
178     /// Set residue probabilities for sequence 2
179     /// @param probs Residue probabilities [in]
180     ///
SetSeq2ResidueProbs(const TFrequencies & probs)181     void SetSeq2ResidueProbs(const TFrequencies& probs)
182     {m_Seq2ResidueProbs = probs;}
183 
184     /// Get sequence 2 residue probabilities
185     /// @return Residue probabilities for sequence 2
186     ///
GetSeq2ResidueProbs(void) const187     const TFrequencies& GetSeq2ResidueProbs(void) const
188     {return m_Seq2ResidueProbs;}
189 
190     /// Set maximum calculation time allowed
191     /// @param time Maximum calculation time [in]
192     ///
193     /// Since the gapless calculation is also run during the gapped calculation
194     /// (only for a and alpha ungapped parameters) it means that the total
195     /// calculation time is equal to time(gapless calculation)+time(gapped
196     /// calculation). Typically gapless calculation is quick but sometimes may
197     /// take time. The program sets maximum allowed calculation time for gapless
198     /// calculation as 0.5*(total maximum time). If the result is not received
199     /// during this time then the program reports an error (throw an exception).
200     /// After successful ungapped calculation the program calculates during the
201     /// remaining available time and uses it for the gapped calculation.
202     /// If the user requests gapless calculation only (m_IsGapped=false) then
203     /// the program uses total allowed calculation time for the gapless
204     /// calculation. If there is no result during this time then the program
205     /// generates an error.
SetMaxCalcTime(double time)206     void SetMaxCalcTime(double time) {m_MaxCalcTime = time;}
207 
208     /// Get maximum calculation time allowed
209     /// @return Maximum calculation time
210     ///
211     /// Since the gapless calculation is also run during the gapped calculation
212     /// (only for a and alpha ungapped parameters) it means that the total
213     /// calculation time is equal to time(gapless calculation)+time(gapped
214     /// calculation). Typically gapless calculation is quick but sometimes may
215     /// take time. The program sets maximum allowed calculation time for gapless
216     /// calculation as 0.5*(total maximum time). If the result is not received
217     /// during this time then the program reports an error (throw an exception).
218     /// After successful ungapped calculation the program calculates during the
219     /// remaining available time and uses it for the gapped calculation.
220     /// If the user requests gapless calculation only (m_IsGapped=false) then
221     /// the program uses total allowed calculation time for the gapless
222     /// calculation. If there is no result during this time then the program
223     /// generates an error.
GetMaxCalcTime(void) const224     double GetMaxCalcTime(void) const {return m_MaxCalcTime;}
225 
226     /// Set maximum memory allowed for computation
227     /// @param Memory limit [in]
228     ///
SetMaxCalcMemory(double mem)229     void SetMaxCalcMemory(double mem) {m_MaxCalcMemory = mem;}
230 
231     /// Get maximum memory allowed for computation
232     /// @return Memory limit
233     ///
GetMaxCalcMemory(void) const234     double GetMaxCalcMemory(void) const {return m_MaxCalcMemory;}
235 
236     /// Get number of residues in utilized alphabet
237     /// @return Number of residues
238     ///
GetNumResidues(void) const239     Int4 GetNumResidues(void) const {return m_NumResidues;}
240 
241     //---- Options Validation ----//
242 
243     /// Validate parameter values
244     ///
245     /// Exception is thrown if options do not pass validation. False is
246     /// returned if there are warnings.
247     /// @return True if options passed validation and false if there
248     /// are warnings
249     ///
250     bool Validate(void);
251 
252     /// Check whether there are any error/warning messages
253     /// @return True if there are messages, false otherwise
254     ///
IsMessage(void) const255     bool IsMessage(void) const {return !m_Messages.empty();}
256 
257     /// Get error/warning messages
258     ///
259     /// Warning messages can be generated by the Validate() function if
260     /// parameters are within their domains but their values are non-typical or
261     /// can result in increased inaccuracies
262     /// @return List of error/warning messages
263     ///
GetMessages(void) const264     const vector<string>& GetMessages(void) const {return m_Messages;}
265 
266 protected:
267     void x_Init(void);
268 
269     /// Get socre matrix value for i-th and j-th residues
x_GetScore(Uint4 i,Uint4 j) const270     Int4 x_GetScore(Uint4 i, Uint4 j) const
271     {return (*m_ScoreMatrix).GetScore(i, j);}
272 
273 private:
274     /// Forbid assignment operator
275     CGumbelParamsOptions& operator=(const CGumbelParamsOptions&);
276 
277 
278 protected:
279 
280     /// Gap opening penalty
281     Int4 m_GapOpening;
282 
283     /// Gap extension penalty
284     Int4 m_GapExtension;
285 
286     /// Desired accuracy for lambda computation
287     /// only for gapped aligmment
288     double m_LambdaAccuracy;
289 
290     /// Desired accuracy for parameter K computation
291     /// only for gapped alignment
292     double m_KAccuracy;
293 
294     /// Gapped/gapless regime, true for gapped, false for gapless.
295     /// For gapless regime calculation accuracy for all parameters is
296     /// always 1e-06 m_LambdaAccuracy and m_KAccuary are not used.
297     bool m_IsGapped;
298 
299     /// Scoring matrix
300     CConstRef<CGeneralScoreMatrix> m_ScoreMatrix;
301 
302     /// Residue frequencies for sequence 1
303     TFrequencies m_Seq1ResidueProbs;
304 
305     /// Residue frequencies for sequence 2
306     TFrequencies m_Seq2ResidueProbs;
307 
308     /// Number of residues in alphabet
309     Int4 m_NumResidues;
310 
311     /// Maximum allowed calculation time
312     double m_MaxCalcTime;
313 
314     /// Maximum allowed calculation memory
315     double m_MaxCalcMemory;
316 
317     /// Warning messages
318     vector<string> m_Messages;
319 };
320 
321 /// Class used for creation of sets of input parameters
322 ///
323 class CGumbelParamsOptionsFactory
324 {
325 
326 public:
327     /// Creates standard options with score matrix and residue frequenceis for
328     /// 20 aa alphabet
329     /// @param smat Score matrix name [in]
330     /// @return Gumbel params options
331     ///
332     static CRef<CGumbelParamsOptions> CreateStandard20AAOptions(
333                                     CGeneralScoreMatrix::EScoreMatrixName smat
334                                     = CGeneralScoreMatrix::eBlosum62);
335 
336 
337     /// Creates standard options with no score matrix or resiudie frequencies
338     /// @return Gumbel params options
339     ///
340     static CRef<CGumbelParamsOptions> CreateBasicOptions(void);
341 };
342 
343 
344 /// Gumbel parameters and estimation errors
345 ///
346 typedef struct SGumbelParams
347 {
348     // Gapped parameters
349     double lambda;
350     double K;
351     double C;
352     double sigma;
353     double alpha_i;
354     double alpha_j;
355     double ai;
356     double aj;
357 
358     // Estimation errors for gapped parameters
359     double lambda_error;
360     double K_error;
361     double C_error;
362     double sigma_error;
363     double alpha_i_error;
364     double alpha_j_error;
365     double ai_error;
366     double aj_error;
367 
368     // Gapless parameters
369     double gapless_alpha;
370     double gapless_a;
371 
372     // Estimation errors for gapless parameters
373     double gapless_alpha_error;
374     double gapless_a_error;
375 
376     Int4 G;
377 
378 } SGumbelParams;
379 
380 
381 /// Result of Gumbel parameters calculation along with diagnostic info
382 ///
383 class CGumbelParamsResult : public CObject
384 {
385 public:
386 
387     struct SSbsArrays {
388         vector<double> lambda_sbs;
389         vector<double> K_sbs;
390         vector<double> C_sbs;
391         vector<double> sigma_sbs;
392         vector<double> alpha_i_sbs;
393         vector<double> alpha_j_sbs;
394         vector<double> ai_sbs;
395         vector<double> aj_sbs;
396     };
397 
398 public:
399     //---- Constructors ----//
400 
401     /// Create empty results
402     ///
CGumbelParamsResult(void)403     CGumbelParamsResult(void) {}
404 
405 
406     //---- Methods that get the results ----//
407 
408 
409     /// Set Gumbel parameters values
410     /// @return Gumbel parameters
411     ///
SetGumbelParams(void)412     SGumbelParams& SetGumbelParams(void) {return m_GumbelParams;}
413 
414     /// Get Gubmel parameters
415     /// @return Gumbel parameters
416     ///
GetGumbelParams(void) const417     const SGumbelParams& GetGumbelParams(void) const {return m_GumbelParams;}
418 
419     /// Set Sbs arrays
420     /// @return Sbs arrays
421     ///
SetSbsArrays(void)422     SSbsArrays& SetSbsArrays(void) {return m_SbsArrays;}
423 
424     /// Get Sbs arrays
425     /// @return Sbs arrays
426     ///
GetSbsArrays(void) const427     const SSbsArrays& GetSbsArrays(void) const {return m_SbsArrays;}
428 
429     /// Set calculation time
430     /// @param time Calculation time [in]
431     ///
SetCalcTime(double time)432     void SetCalcTime(double time) {m_CalcTime = time;}
433 
434     /// Get calculation time
435     /// @return calculation time
436     ///
GetCalcTime(void) const437     double GetCalcTime(void) const {return m_CalcTime;}
438 
439 protected:
440     /// Forbid Copy constructor
441     CGumbelParamsResult(const CGumbelParamsResult& result);
442 
443     /// Forbid assignment operator
444     CGumbelParamsResult& operator=(const CGumbelParamsResult&);
445 
446 
447 protected:
448 
449     SGumbelParams m_GumbelParams;
450     SSbsArrays m_SbsArrays;
451 
452     double m_CalcTime;
453 };
454 
455 
456 /// Wrapper for Gumbel parameter calculation
457 ///
458 class CGumbelParamsCalc : public CObject
459 {
460 public:
461     //---- Constructors ----//
462 
463     /// Create calculation object with given options
464     /// @param opts Gumbel params options [in]
465     ///
466     CGumbelParamsCalc(const CRef<CGumbelParamsOptions>& opts);
467 
468 
469     /// Create calculation object with given options and randomization
470     /// parameters. The constructor fixes randomization parameters and
471     /// makes results predictible. Should be used only for testing.
472     /// @param opts Gumbel params options [in]
473     /// @param rand_opts Gumbel params randomization parameters
474     ///
475     CGumbelParamsCalc(const CRef<CGumbelParamsOptions>& opts,
476                       const CRef<CGumbelParamsRandDiagnostics>& rand_opts);
477 
478     //---- Options ----//
479 
480     /// Set randomization parameters. Fixes randomization parameters and
481     /// makes results predictible. Should be used only for testing.
482     /// @param rand_opts Randomization parameters
483     ///
SetRandParams(const CRef<CGumbelParamsRandDiagnostics> & rand_opts)484     void SetRandParams(const CRef<CGumbelParamsRandDiagnostics>& rand_opts)
485     {m_RandParams = rand_opts;}
486 
487 
488     //---- Calculation ----//
489 
490     /// Perform Gumbel parameter calculation
491     /// @return Gumbel parameters
492     ///
493     CRef<CGumbelParamsResult> Run(void);
494 
495 
496     //---- Getting results ----//
497 
498     /// Get randomization parameters
499     /// @return Randomization parameters
500     ///
GetRandParams(void)501     CRef<CGumbelParamsRandDiagnostics> GetRandParams(void) {return m_RandParams;}
502 
503     /// Get claculation options
504     /// @return Options
505     ///
GetOptions(void) const506     CConstRef<CGumbelParamsOptions> GetOptions(void) const {return m_Options;}
507 
508 
509 
510     /// Get computation result
511     /// @return Gumbel parameters and diagnostics
512     ///
GetResult(void)513     CRef<CGumbelParamsResult> GetResult(void) {return m_Result;}
514 
GetGumbelParams(void) const515     const SGumbelParams& GetGumbelParams(void) const
516     {return m_Result->GetGumbelParams();}
517 
518 protected:
519     /// Forbid copy constructor
520     CGumbelParamsCalc(const CGumbelParamsCalc&);
521 
522     /// Forbid assignment oprator
523     CGumbelParamsCalc& operator=(const CGumbelParamsCalc&);
524 
525 
526 protected:
527     CConstRef<CGumbelParamsOptions> m_Options;
528     CRef<CGumbelParamsRandDiagnostics> m_RandParams;
529 
530     CRef<CGumbelParamsResult> m_Result;
531 
532 public:
533     /// Major version
534     static const int kMajorVersion = 2;
535     /// Minor version
536     static const int kMinorVersion = 0;
537     /// Patch version
538     static const int kPatchVersion = 0;
539 };
540 
541 
542 /// Options that control random values used in internal parts of Gumbel
543 /// parameter calculation for gapped aligmment. This class should be used
544 /// only for diagnostics. Supplying this class as argument to
545 /// CGumbelParamsCalc::Run() yileds deterministic results.
546 ///
547 class CGumbelParamsRandDiagnostics : public CObject
548 {
549 public:
550 
551     /// Constructor
CGumbelParamsRandDiagnostics(void)552     CGumbelParamsRandDiagnostics(void) {}
553 
554     /// Set random seed
555     /// @param val Random seed [in]
556     ///
SetRandomSeed(Uint4 val)557     void SetRandomSeed(Uint4 val) {m_RandomSeed = val;}
558 
559     /// Get random seed
560     /// @return Random seed
561     ///
GetRandomSeed(void) const562     Uint4 GetRandomSeed(void) const {return m_RandomSeed;}
563 
564     /// Set first stage preliminary realizations numbers
565     /// @return Array of first stage preliminary realizations numbers
566     ///
SetFirstStagePrelimReNumbers(void)567     vector<Int4>& SetFirstStagePrelimReNumbers(void)
568     {return m_FirstStagePrelimReNumbers;}
569 
570     /// Get first stage preliminary realizations numbers
571     /// @return First stage preliminary realizations numbers
572     ///
GetFirstStagePrelimReNumbers(void) const573     const vector<Int4>& GetFirstStagePrelimReNumbers(void) const
574     {return m_FirstStagePrelimReNumbers;}
575 
576     /// Set preliminary realizations numbers
577     /// @return Array of preliminary realizations numbers
578     ///
SetPrelimReNumbers(void)579     vector<Int4>& SetPrelimReNumbers(void)
580     {return m_PrelimReNumbers;}
581 
582     /// Get preliminary realizations numbers
583     /// @return Preliminary realizations numbers
584     ///
GetPrelimReNumbers(void) const585     const vector<Int4>& GetPrelimReNumbers(void) const
586     {return m_PrelimReNumbers;}
587 
588     /// Set perliminary realizations numbers killing array
589     /// @return Array of preliminary realizations numbers killing
590     ///
SetPrelimReNumbersKilling(void)591     vector<Int4>& SetPrelimReNumbersKilling(void)
592     {return m_PrelimReNumbersKilling;}
593 
594     /// Get perliminary realizations numbers killing array
595     /// @return Preliminary realizations numbers killing
596     ///
GetPrelimReNumbersKilling(void) const597     const vector<Int4>& GetPrelimReNumbersKilling(void) const
598     {return m_PrelimReNumbersKilling;}
599 
600     /// Set total realizations number
601     /// @param num Total realizations number [in]
602     ///
SetTotalReNumber(Int4 num)603     void SetTotalReNumber(Int4 num) {m_TotalReNumber = num;}
604 
605     /// Get total realizations number
606     /// @return Total realizations number
607     ///
GetTotalReNumber(void) const608     Int4 GetTotalReNumber(void) const {return m_TotalReNumber;}
609 
610     /// Set total realizations number killing
611     /// @param num Total realizations number killing [in]
612     ///
SetTotalReNumberKilling(Int4 num)613     void SetTotalReNumberKilling(Int4 num) {m_TotalReNumberKilling = num;}
614 
615     /// Get total realizations number killing
616     /// @return Total realizations number killing [in]
617     ///
GetTotalReNumberKilling(void) const618     Int4 GetTotalReNumberKilling(void) const {return m_TotalReNumberKilling;}
619 
620 
621 private:
622 
623     /// Random seed
624     Uint4 m_RandomSeed;
625 
626     /// Frist stage preliminary realizations numbers ALP
627     vector<Int4> m_FirstStagePrelimReNumbers;
628 
629     // Preliminary realizations numbers ALP
630     vector<Int4> m_PrelimReNumbers;
631 
632     /// Preliminary realizations numbers killing
633     vector<Int4> m_PrelimReNumbersKilling;
634 
635     /// Total realizations number ALP
636     Int4 m_TotalReNumber;
637 
638     // Total realizations number killing
639     Int4 m_TotalReNumberKilling;
640 };
641 
642 /// Exception class
643 class CGumbelParamsException : public CException
644 {
645 public:
646     enum EErrCode {
647         eInvalidOptions,
648         eLimitsExceeded,
649         eParamsError,
650         eMemAllocError,
651         eUnexpectedError,
652         eRepeatCalc
653     };
654 
GetErrCodeString(void) const655     virtual const char* GetErrCodeString(void) const override {
656         return "eInvalid";
657     }
658 
659     NCBI_EXCEPTION_DEFAULT(CGumbelParamsException, CException);
660 };
661 
662 END_SCOPE(blast)
663 END_NCBI_SCOPE
664 
665 #endif // ALGO_BLAST_GUMBEL_PARAMS__GUMBEL_PARAMS___HPP
666 
667 
668