1 /*
2 * $Id: intron.cpp 110120 2007-09-07 16:57:54Z chetvern $
3 *
4 * =========================================================================
5 *
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's official duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government have not placed any restriction on its use or reproduction.
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannt warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * Please cite the author in any work or product based on this material.
25 *
26 * =========================================================================
27 *
28 * Author: Boris Kiryutin
29 *
30 * =========================================================================
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35
36 #include <algo/align/prosplign/prosplign_exception.hpp>
37
38 #include"nucprot.hpp"
39 #include"intron.hpp"
40
41 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(prosplign)42 BEGIN_SCOPE(prosplign)
43
44 // int CAnyIntron::ini_nuc_margin = 1;//minimum - 0
45 // // for versions with no end gap cost at least one required.
46
47 void CAnyIntron::SimpleNucStep(const CProSplignScaledScoring scoring) {
48 j++;
49 swa.first -= scoring.ie;
50 swt.first -= scoring.ie;
51 swg.first -= scoring.ie;
52 swc.first -= scoring.ie;
53 swn.first -= scoring.ie;
54
55 sea.first -= scoring.ie;
56 set.first -= scoring.ie;
57 seg.first -= scoring.ie;
58 sec.first -= scoring.ie;
59 sen.first -= scoring.ie;
60
61 sw111.increment(scoring);
62 sfv111.increment(scoring);
63 sw000.increment(scoring);
64 sh000.increment(scoring);
65 sv000.increment(scoring);
66 sfh000.increment(scoring);
67 sfv000.increment(scoring);
68 sw012.increment(scoring);
69 sh012.increment(scoring);
70 sw021.increment(scoring);
71 sh021.increment(scoring);
72 }
73
AddW1(const CProSplignScaledScoring scoring)74 void CAnyIntron::AddW1(const CProSplignScaledScoring scoring) {
75 int jj = j-scoring.lmin-2;
76 int sco = esc[jj-1];
77 switch (nseq[jj-1]) {
78 case nA:
79 if(swa.first<sco) {
80 swa.first = sco;
81 swa.second = jj;
82 }
83 break;
84 case nC:
85 if(swc.first<sco) {
86 swc.first = sco;
87 swc.second = jj;
88 }
89 break;
90 case nG:
91 if(swg.first<sco) {
92 swg.first = sco;
93 swg.second = jj;
94 }
95 break;
96 case nT:
97 if(swt.first<sco) {
98 swt.first = sco;
99 swt.second = jj;
100 }
101 break;
102 default:
103 if(swn.first<sco) {
104 swn.first = sco;
105 swn.second = jj;
106 }
107 break;
108 }
109 }
110
AddW2(const CProSplignScaledScoring scoring,const CSubstMatrix & matrix)111 void CAnyIntron::AddW2(const CProSplignScaledScoring scoring, const CSubstMatrix& matrix) {
112 int sco = esc[j-scoring.lmin-3] + matrix.MultScore(nseq[j-scoring.lmin-3], nseq[j-scoring.lmin-2], nA, amin);
113 if(sco > sea.first) {
114 sea.first = sco;
115 sea.second = j-scoring.lmin-1;
116 }
117 sco = esc[j-scoring.lmin-3] + matrix.MultScore(nseq[j-scoring.lmin-3], nseq[j-scoring.lmin-2], nT, amin);
118 if(sco > set.first) {
119 set.first = sco;
120 set.second = j-scoring.lmin-1;
121 }
122 sco = esc[j-scoring.lmin-3] + matrix.MultScore(nseq[j-scoring.lmin-3], nseq[j-scoring.lmin-2], nG, amin);
123 if(sco > seg.first) {
124 seg.first = sco;
125 seg.second = j-scoring.lmin-1;
126 }
127 sco = esc[j-scoring.lmin-3] + matrix.MultScore(nseq[j-scoring.lmin-3], nseq[j-scoring.lmin-2], nC, amin);
128 if(sco > sec.first) {
129 sec.first = sco;
130 sec.second = j-scoring.lmin-1;
131 }
132 sco = esc[j-scoring.lmin-3] + matrix.MultScore(nseq[j-scoring.lmin-3], nseq[j-scoring.lmin-2], nN, amin);
133 if(sco > sen.first) {
134 sen.first = sco;
135 sen.second = j-scoring.lmin-1;
136 }
137 }
138
139
NucStep(const CProSplignScaledScoring scoring,const CSubstMatrix & matrix)140 void CAnyIntron::NucStep(const CProSplignScaledScoring scoring, const CSubstMatrix& matrix)
141 {
142 SimpleNucStep(scoring);
143 if(j - scoring.lmin - 3 >= scoring.ini_nuc_margin) {
144 AddW1(scoring);
145 AddW2(scoring, matrix);
146 }//end j > lmin +3
147 sw111.AddDon(scoring);
148 sfv111.AddDon(scoring);
149 sw000.AddDon(scoring);
150 sh000.AddDon(scoring);
151 sv000.AddDon(scoring);
152 sfh000.AddDon(scoring);
153 sfv000.AddDon(scoring);
154 sw012.AddDon(scoring);
155 sh012.AddDon(scoring);
156 sw021.AddDon(scoring);
157 sh021.AddDon(scoring);
158
159 }
160
161 //CFIntron implementation
162
CFIntron(const CNSeq & nseq,const CProSplignScaledScoring scoring)163 CFIntron::CFIntron(const CNSeq& nseq, const CProSplignScaledScoring scoring) : m_nseq(nseq)
164 {
165 if(scoring.lmin<4) NCBI_THROW(CProSplignException, eParam, "minimum intron length should exceed 3");
166 CFIntornData cd;
167 int jend = nseq.size() + 1;
168 m_data.resize(jend+1);//m_data[0] is a dummy, m_data[1;nseq.size()+1] used
169 for(int j=0; j < jend; ++j) {
170 //setup for w1, w2
171 if(j - scoring.lmin - 3 < 0) {
172 cd.m_dt1 = cd.m_dt2 = eANY;
173 cd.m_at1 = cd.m_at2 = eANYa;
174 cd.m_1nuc = cd.m_2nuc = nN;
175 } else {
176 int ind = j - scoring.lmin - 4;
177 cd.m_1nuc = (Nucleotides)nseq[++ind];
178 char n1 = nseq[++ind];
179 char n2 = nseq[++ind];
180 char n3 = nseq[++ind];
181 cd.m_dt1 = GetDonType(n1, n2);
182 cd.m_dt2 = GetDonType(n2, n3);
183 ind = j - 5;
184 n1 = nseq[++ind];
185 n2 = nseq[++ind];
186 n3 = nseq[++ind];
187 cd.m_2nuc = (Nucleotides)nseq[++ind];
188 cd.m_at1 = GetAccType(n1, n2);
189 cd.m_at2 = GetAccType(n2, n3);
190 }
191 //setup for w
192 if(j - scoring.lmin < 0) {
193 cd.m_dt = eANY;
194 cd.m_at = eANYa;
195 } else {
196 cd.m_dt = GetDonType(nseq[j-scoring.lmin], nseq[j-scoring.lmin+1]);
197 cd.m_at = GetAccType(nseq[j-2], nseq[j-1]);
198 }
199 m_data[j+1] = cd;
200 }
201 }
202
Reset(void)203 void CFIntronDon::Reset(void)
204 {
205 m_v.first = infinity;
206 m_w.first = infinity;
207 m_h1.first = infinity;
208 m_h2.first = infinity;
209 m_h3.first = infinity;
210 for(int i=0; i<5; ++i) {
211 m_w1[i] = m_w2[i] = infinity;
212 }
213 }
214
InitRowScores(CAlignRow * row,vector<int> & prevw,int j)215 void CFIntron::InitRowScores(CAlignRow *row, vector<int>& prevw, int j)
216 {
217 m_gt.Reset();
218 m_gc.Reset();
219 m_at.Reset();
220 m_any.Reset();
221 for(int i=0; i<5; ++i) {
222 m_w1s[i] = m_w2s[i] = 0;
223 }
224 m_cd = &m_data[j];
225 m_w = &row->m_w[3+j];
226 m_w12 = &prevw[j];
227 m_v = &row->m_v[j];
228 m_h1 = &row->m_h1[j];
229 m_h2 = &row->m_h2[j];
230 m_h3 = &row->m_h3[j];
231 }
232
233
234 END_SCOPE(prosplign)
235 END_NCBI_SCOPE
236