1 #ifndef ALGO___NW_PSSM_ALIGNER__HPP
2 #define ALGO___NW_PSSM_ALIGNER__HPP
3 
4 /* $Id: nw_pssm_aligner.hpp 111145 2007-09-24 15:37:09Z papadopo $
5 * ===========================================================================
6 *
7 *                            public DOMAIN NOTICE
8 *               National Center for Biotechnology Information
9 *
10 *  This software/database is a "United States Government Work" under the
11 *  terms of the United States Copyright Act.  It was written as part of
12 *  the author's official duties as a United States Government employee and
13 *  thus cannot be copyrighted.  This software/database is freely available
14 *  to the public for use. The National Library of Medicine and the U.S.
15 *  Government have not placed any restriction on its use or reproduction.
16 *
17 *  Although all reasonable efforts have been taken to ensure the accuracy
18 *  and reliability of the software and data, the NLM and the U.S.
19 *  Government do not and cannot warrant the performance or results that
20 *  may be obtained by using this software or data. The NLM and the U.S.
21 *  Government disclaim all warranties, express or implied, including
22 *  warranties of performance, merchantability or fitness for any particular
23 *  purpose.
24 *
25 *  Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author:  Jason Papadopoulos
30 *
31 * File Description:
32 *   CPSSMAligner class definition
33 *
34 *   CPSSMAligner encapsulates a generic global (Needleman-Wunsch)
35 *   alignment algorithm with affine gap penalty model and position-
36 *   specific scoring for one or both input sequences.
37 *
38 */
39 
40 #include <corelib/ncbistd.hpp>
41 #include <corelib/ncbiobj.hpp>
42 #include <corelib/ncbi_limits.hpp>
43 #include <util/tables/raw_scoremat.h>
44 #include <algo/align/nw/nw_aligner.hpp>
45 
46 #include <vector>
47 #include <string>
48 
49 
50 /** @addtogroup AlgoAlignRoot
51  *
52  * @{
53  */
54 
55 
56 BEGIN_NCBI_SCOPE
57 
58 
59 // Needleman Wunsch algorithm with position-specific scoring
60 //
61 
62 class NCBI_XALGOALIGN_EXPORT CPSSMAligner: public CNWAligner
63 {
64 public:
65     // ctors
66     CPSSMAligner();
67 
68     CPSSMAligner(const CNWAligner::TScore** pssm1, size_t len1,
69                  const char* seq2, size_t len2);
70 
71     CPSSMAligner(const double** freq1, size_t len1,
72                  const double** freq2, size_t len2,
73                  const SNCBIPackedScoreMatrix* scoremat,
74                  const int scale = 1);
75 
~CPSSMAligner(void)76     virtual ~CPSSMAligner(void) {}
77 
78     // Compute the alignment
79     virtual CNWAligner::TScore Run(void);
80 
81     // Setters
82     void SetSequences(const char* seq1, size_t len1,
83                       const char* seq2, size_t len2,
84                       bool verify = true);
85 
86     void SetSequences(const CNWAligner::TScore** pssm1, size_t len1,
87                       const char* seq2, size_t len2,
88                       bool verify = true);
89 
90     void SetSequences(const double** freq1, size_t len1,
91                       const double** freq2, size_t len2,
92                       const int scale = 1);
93 
94     void SetScoreMatrix(const SNCBIPackedScoreMatrix* scoremat);
95 
SetFreqScale(const int scale)96     void SetFreqScale(const int scale) {m_FreqScale = scale;}
97 
SetWg(TScore value)98     void SetWg  (TScore value)   // gap opening
99     {
100         m_StartWg = m_Wg  = m_EndWg = value;
101     }
SetWs(TScore value)102     void SetWs  (TScore value)   // gap extension
103     {
104         m_StartWs = m_Ws  = m_EndWs = value;
105     }
SetStartWg(TScore value)106     void SetStartWg(TScore value)  { m_StartWg = value; }   // gap opening
SetStartWs(TScore value)107     void SetStartWs(TScore value)  { m_StartWs = value; }   // gap extension
SetEndWg(TScore value)108     void SetEndWg(TScore value)    { m_EndWg = value; }   // gap opening
SetEndWs(TScore value)109     void SetEndWs(TScore value)    { m_EndWs = value; }   // gap extension
110 
111     // Getters
GetPssm1() const112     const CNWAligner::TScore** GetPssm1() const {return m_Pssm1;}
GetSeq1() const113     const char* GetSeq1() const                 {return m_Seq1;}
GetFreq1() const114     const double** GetFreq1() const             {return m_Freq1;}
GetFreq2() const115     const double** GetFreq2() const             {return m_Freq2;}
GetFreqScale() const116     int GetFreqScale() const                    {return m_FreqScale;}
117 
GetStartWg() const118     TScore GetStartWg() const { return m_StartWg; }
GetStartWs() const119     TScore GetStartWs() const { return m_StartWs; }
GetEndWg() const120     TScore GetEndWg() const   { return m_EndWg; }
GetEndWs() const121     TScore GetEndWs() const   { return m_EndWs; }
GetMatrix()122     SNCBIFullScoreMatrix& GetMatrix() { return m_ScoreMatrix; }
123 
124     virtual TScore ScoreFromTranscript(const TTranscript& transcript,
125                                        size_t start1 = 0,
126                                        size_t start2 = 0) const;
127 
128 protected:
129 
130     // only NCBIstdaa alphabet supported
131     static const int kPSSM_ColumnSize = 28;
132 
133     // Source sequences
134     const TScore** m_Pssm1;
135     const double** m_Freq1;
136 
137     const char*    m_Seq2;
138     const double** m_Freq2;
139 
140     // scale factor for position frequencies
141     int                        m_FreqScale;
142 
143     TScore   m_StartWg;// gap opening penalty for initial gaps
144     TScore   m_StartWs;// gap extension penalty for initial gaps
145     TScore   m_EndWg;  // gap opening penalty for terminal gaps
146     TScore   m_EndWs;  // gap extension penalty for terminal gaps
147 
148     // core dynamic programming
149     virtual TScore x_Align (SAlignInOut* data);
150     TScore x_AlignProfile (SAlignInOut* data);
151     TScore x_AlignPSSM (SAlignInOut* data);
152 
153     // retrieve transcript symbol for a one-character diag
154     virtual ETranscriptSymbol x_GetDiagTS(size_t i1, size_t i2) const;
155 
156     double m_DScoreMatrix[kPSSM_ColumnSize][kPSSM_ColumnSize];
157 };
158 
159 
160 END_NCBI_SCOPE
161 
162 
163 /* @} */
164 
165 #endif  /* ALGO___NW_PSSM_ALIGNER__HPP */
166