1 /*
2 *  $Id: intron.cpp 110120 2007-09-07 16:57:54Z chetvern $
3 *
4 * =========================================================================
5 *
6 *                            PUBLIC DOMAIN NOTICE
7 *               National Center for Biotechnology Information
8 *
9 *  This software/database is a "United States Government Work" under the
10 *  terms of the United States Copyright Act.  It was written as part of
11 *  the author's official duties as a United States Government employee and
12 *  thus cannot be copyrighted.  This software/database is freely available
13 *  to the public for use. The National Library of Medicine and the U.S.
14 *  Government have not placed any restriction on its use or reproduction.
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannt warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 *  Please cite the author in any work or product based on this material.
25 *
26 * =========================================================================
27 *
28 *  Author: Boris Kiryutin
29 *
30 * =========================================================================
31 */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
36 #include <algo/align/prosplign/prosplign_exception.hpp>
37 
38 #include"nucprot.hpp"
39 #include"intron.hpp"
40 
41 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(prosplign)42 BEGIN_SCOPE(prosplign)
43 
44 // int CAnyIntron::ini_nuc_margin = 1;//minimum - 0
45 //                                    // for versions with no end gap cost at least one required.
46 
47 void CAnyIntron::SimpleNucStep(const CProSplignScaledScoring scoring) {
48     j++;
49     swa.first -= scoring.ie;
50     swt.first -= scoring.ie;
51     swg.first -= scoring.ie;
52     swc.first -= scoring.ie;
53     swn.first -= scoring.ie;
54 
55     sea.first -= scoring.ie;
56     set.first -= scoring.ie;
57     seg.first -= scoring.ie;
58     sec.first -= scoring.ie;
59     sen.first -= scoring.ie;
60 
61     sw111.increment(scoring);
62     sfv111.increment(scoring);
63     sw000.increment(scoring);
64     sh000.increment(scoring);
65     sv000.increment(scoring);
66     sfh000.increment(scoring);
67     sfv000.increment(scoring);
68     sw012.increment(scoring);
69     sh012.increment(scoring);
70     sw021.increment(scoring);
71     sh021.increment(scoring);
72 }
73 
AddW1(const CProSplignScaledScoring scoring)74 void CAnyIntron::AddW1(const CProSplignScaledScoring scoring) {
75     int jj = j-scoring.lmin-2;
76     int sco = esc[jj-1];
77             switch (nseq[jj-1]) {
78                 case nA:
79                     if(swa.first<sco) {
80                             swa.first = sco;
81                             swa.second = jj;
82                     }
83                     break;
84                 case nC:
85                     if(swc.first<sco) {
86                             swc.first = sco;
87                             swc.second = jj;
88                     }
89                     break;
90                 case nG:
91                     if(swg.first<sco) {
92                             swg.first = sco;
93                             swg.second = jj;
94                     }
95                     break;
96                 case nT:
97                     if(swt.first<sco) {
98                             swt.first = sco;
99                             swt.second = jj;
100                     }
101                     break;
102                 default:
103                     if(swn.first<sco) {
104                             swn.first = sco;
105                             swn.second = jj;
106                     }
107                     break;
108             }
109 }
110 
AddW2(const CProSplignScaledScoring scoring,const CSubstMatrix & matrix)111 void CAnyIntron::AddW2(const CProSplignScaledScoring scoring, const CSubstMatrix& matrix) {
112             int sco = esc[j-scoring.lmin-3] + matrix.MultScore(nseq[j-scoring.lmin-3], nseq[j-scoring.lmin-2], nA, amin);
113             if(sco > sea.first) {
114                 sea.first = sco;
115                 sea.second = j-scoring.lmin-1;
116             }
117             sco = esc[j-scoring.lmin-3] + matrix.MultScore(nseq[j-scoring.lmin-3], nseq[j-scoring.lmin-2], nT, amin);
118             if(sco > set.first) {
119                 set.first = sco;
120                 set.second = j-scoring.lmin-1;
121             }
122             sco = esc[j-scoring.lmin-3] + matrix.MultScore(nseq[j-scoring.lmin-3], nseq[j-scoring.lmin-2], nG, amin);
123             if(sco > seg.first) {
124                 seg.first = sco;
125                 seg.second = j-scoring.lmin-1;
126             }
127             sco = esc[j-scoring.lmin-3] + matrix.MultScore(nseq[j-scoring.lmin-3], nseq[j-scoring.lmin-2], nC, amin);
128             if(sco > sec.first) {
129                 sec.first = sco;
130                 sec.second = j-scoring.lmin-1;
131             }
132             sco = esc[j-scoring.lmin-3] + matrix.MultScore(nseq[j-scoring.lmin-3], nseq[j-scoring.lmin-2], nN, amin);
133             if(sco > sen.first) {
134                 sen.first = sco;
135                 sen.second = j-scoring.lmin-1;
136             }
137 }
138 
139 
NucStep(const CProSplignScaledScoring scoring,const CSubstMatrix & matrix)140 void CAnyIntron::NucStep(const CProSplignScaledScoring scoring, const CSubstMatrix& matrix)
141 {
142     SimpleNucStep(scoring);
143     if(j - scoring.lmin - 3 >= scoring.ini_nuc_margin) {
144         AddW1(scoring);
145         AddW2(scoring, matrix);
146     }//end j > lmin +3
147     sw111.AddDon(scoring);
148     sfv111.AddDon(scoring);
149     sw000.AddDon(scoring);
150     sh000.AddDon(scoring);
151     sv000.AddDon(scoring);
152     sfh000.AddDon(scoring);
153     sfv000.AddDon(scoring);
154     sw012.AddDon(scoring);
155     sh012.AddDon(scoring);
156     sw021.AddDon(scoring);
157     sh021.AddDon(scoring);
158 
159 }
160 
161 //CFIntron implementation
162 
CFIntron(const CNSeq & nseq,const CProSplignScaledScoring scoring)163 CFIntron::CFIntron(const CNSeq& nseq, const CProSplignScaledScoring scoring) : m_nseq(nseq)
164 {
165     if(scoring.lmin<4) NCBI_THROW(CProSplignException, eParam, "minimum intron length should exceed 3");
166     CFIntornData cd;
167     int jend = nseq.size() + 1;
168     m_data.resize(jend+1);//m_data[0] is a dummy, m_data[1;nseq.size()+1] used
169     for(int j=0; j < jend; ++j) {
170         //setup for w1, w2
171         if(j - scoring.lmin - 3 < 0) {
172             cd.m_dt1 = cd.m_dt2 = eANY;
173             cd.m_at1 = cd.m_at2 = eANYa;
174             cd.m_1nuc = cd.m_2nuc = nN;
175         } else {
176             int ind = j - scoring.lmin - 4;
177             cd.m_1nuc = (Nucleotides)nseq[++ind];
178             char n1 = nseq[++ind];
179             char n2 = nseq[++ind];
180             char n3 = nseq[++ind];
181             cd.m_dt1 = GetDonType(n1, n2);
182             cd.m_dt2 = GetDonType(n2, n3);
183             ind = j - 5;
184             n1 = nseq[++ind];
185             n2 = nseq[++ind];
186             n3 = nseq[++ind];
187             cd.m_2nuc = (Nucleotides)nseq[++ind];
188             cd.m_at1 = GetAccType(n1, n2);
189             cd.m_at2 = GetAccType(n2, n3);
190         }
191         //setup for w
192         if(j - scoring.lmin < 0) {
193             cd.m_dt = eANY;
194             cd.m_at = eANYa;
195         } else {
196             cd.m_dt = GetDonType(nseq[j-scoring.lmin], nseq[j-scoring.lmin+1]);
197             cd.m_at = GetAccType(nseq[j-2], nseq[j-1]);
198         }
199         m_data[j+1] = cd;
200     }
201 }
202 
Reset(void)203 void CFIntronDon::Reset(void)
204 {
205     m_v.first = infinity;
206     m_w.first = infinity;
207     m_h1.first = infinity;
208     m_h2.first = infinity;
209     m_h3.first = infinity;
210     for(int i=0; i<5; ++i) {
211         m_w1[i] = m_w2[i] = infinity;
212     }
213 }
214 
InitRowScores(CAlignRow * row,vector<int> & prevw,int j)215 void CFIntron::InitRowScores(CAlignRow *row, vector<int>& prevw, int j)
216 {
217     m_gt.Reset();
218     m_gc.Reset();
219     m_at.Reset();
220     m_any.Reset();
221     for(int i=0; i<5; ++i) {
222         m_w1s[i] = m_w2s[i] = 0;
223     }
224     m_cd = &m_data[j];
225     m_w = &row->m_w[3+j];
226     m_w12 = &prevw[j];
227     m_v = &row->m_v[j];
228     m_h1 = &row->m_h1[j];
229     m_h2 = &row->m_h2[j];
230     m_h3 = &row->m_h3[j];
231 }
232 
233 
234 END_SCOPE(prosplign)
235 END_NCBI_SCOPE
236