1 static char const rcsid[] = "$Id: bandalg1.c,v 6.3 2003/05/30 17:25:35 coulouri Exp $";
2
3 /* ===========================================================================
4 *
5 * PUBLIC DOMAIN NOTICE
6 * National Center for Biotechnology Information
7 *
8 * This software/database is a "United States Government Work" under the
9 * terms of the United States Copyright Act. It was written as part of
10 * the author's official duties as a United States Government employee and
11 * thus cannot be copyrighted. This software/database is freely available
12 * to the public for use. The National Library of Medicine and the U.S.
13 * Government have not placed any restriction on its use or reproduction.
14 *
15 * Although all reasonable efforts have been taken to ensure the accuracy
16 * and reliability of the software and data, the NLM and the U.S.
17 * Government do not and cannot warrant the performance or results that
18 * may be obtained by using this software or data. The NLM and the U.S.
19 * Government disclaim all warranties, express or implied, including
20 * warranties of performance, merchantability or fitness for any particular
21 * purpose.
22 *
23 * Please cite the author in any work or product based on this material.
24 *
25 * ===========================================================================*/
26 /*****************************************************************************
27
28 File name: bandalgn1.c
29
30 Author: Gennadiy Savchuk, Jinqhui Zhang, Tom Madden
31
32 Contents: Functions to perform global quadratic banded alignments.
33
34 *****************************************************************************/
35
36 #include <bandalgn.h>
37
38 static Int4 g_band1_align(Uint1Ptr A, Uint1Ptr B,
39 Int4 M, Int4 N,
40 Int4 low, Int4 up,
41 data_t *data);
42
43 static Int4 g_band1_CHECK_SCORE(Uint1Ptr A, Uint1Ptr B,
44 Int4 M, Int4 N,
45 Int4 PNTR S, data_t *data);
46
gband_quadratic(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4Ptr PNTR matrix,PSUGapOptionsPtr option,Int4Ptr S,Int4Ptr Slen)47 Int4 LIBCALL gband_quadratic(Uint1Ptr A, Uint1Ptr B,
48 Int4 M, Int4 N,
49 Int4Ptr PNTR matrix,
50 PSUGapOptionsPtr option,
51 Int4Ptr S, Int4Ptr Slen)
52 {
53 data_t data;
54 Int4 c, i, j;
55 Int4 low, up;
56 Int4 band;
57 Int4 score;
58
59 /* Setup global parameters */
60 data.g = option->gopen;
61 data.zzh = option->gext;
62 data.w = matrix;
63 data.m = data.g + data.zzh;
64
65 data.sapp = S;
66 data.last = 0;
67 *Slen = 0;
68
69 low = option->start_diag;
70 band = option->width;
71 up = band + low - 1;
72
73 low = MIN(MAX(-M, low),MIN(N - M, 0));
74 up = MAX(MIN(N, up),MAX(N - M, 0));
75
76 if (up < 0 || low > 0) {
77 ErrPostEx(SEV_WARNING, 0, 0,
78 "The band does not include (0,0) grid point");
79 return 0;
80 }
81 if (up+M < N || low+M > N) {
82 ErrPostEx(SEV_WARNING, 0, 0,
83 "The band does not include lower corner");
84 return 0;
85 }
86
87 if (N <= 0) {
88 if (M > 0) DEL_(M);
89 return -gap_(M);
90 }
91 if (M <= 0) {
92 INS_(N);
93 return -gap_(N);
94 }
95 if ((band = up-low+1) <= 1) {
96 c = 0;
97 for (i = 1; i <= M; i++) {
98 REP_;
99 c += data.w[A[i]][B[i]];
100 }
101 return c;
102 }
103
104 j = (band+2) * sizeof(Int4);
105
106 data.CD = (dp_ptr) MemGet(sizeof(dp_node)*(band+2), MGET_ERRPOST);
107
108 c = g_band1_align(A, B, M, N, low, up, &data);
109
110 score = g_band1_CHECK_SCORE(A, B, M, N, S, &data);
111
112 if (score != c) {
113 ErrPostEx(SEV_WARNING, 0, 0, "Check_Score = %ld align_score = %ld ",
114 (long) score, (long) c);
115 return 0;
116 }
117 MemFree(data.CD);
118
119 *Slen = data.sapp - S;
120
121 return c;
122 }
123
124 /* g_band1_CHECK_SCORE - return the score of the alignment stored in S */
125
g_band1_CHECK_SCORE(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4 PNTR S,data_t * data)126 static Int4 g_band1_CHECK_SCORE(Uint1Ptr A, Uint1Ptr B,
127 Int4 M, Int4 N, Int4 PNTR S, data_t *data)
128 {
129 register Int4 i, j, op;
130 Int4 score;
131
132 score = i = j = op = 0;
133 while (i < M || j < N) {
134 op = *S++;
135 if (op == 0)
136 score = data->w[A[++i]][B[++j]] + score;
137 else if (op > 0) {
138 score = score - (data->g + op * data->zzh);
139 j = j + op;
140 } else {
141 score = score - (data->g - op * data->zzh);
142 i = i - op;
143 }
144 }
145 return(score);
146 }
147
148 /* g_band1_align(A,B,M,N,up,low,tb,te,data) returns the cost of an optimum
149 conversion between
150 A[1..M] and B[1..N] and appends such a conversion to the current script.
151 tb(te)= 1 no gap-open penalty if the conversion begins(ends) with a delete.
152 tb(te)= 2 no gap-open penalty if the conversion begins(ends) with an insert.
153 */
154
g_band1_align(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4 low,Int4 up,data_t * data)155 static Int4 g_band1_align(Uint1Ptr A, Uint1Ptr B,
156 Int4 M, Int4 N,
157 Int4 low, Int4 up, data_t *data)
158 {
159 Int4 k, v;
160 Int4 band, j;
161 Int4 leftd, rightd; /* for CC, DD, CP and DP */
162 register Int4 curd; /* current index for CC, DD CP and DP */
163 register Int4 i;
164 register Int4 c, d, e;
165 register dp_ptr ap;
166 Int4 t;
167 Int4Ptr wa;
168 Int1Ptr PNTR state, st, tmp;
169 Int4 ib;
170
171
172 /* Boundary cases: M <= 0 , N <= 0, or up-low <= 0 */
173 band = up-low+1;
174 state = (Int1Ptr PNTR) MemGet(sizeof(Int1Ptr)*(M+1), MGET_ERRPOST);
175 state[0] = (Int1Ptr) MemGet((M+1)*(band+2), MGET_ERRPOST);
176 for (i = 1; i <= M; i++) state[i] = state[i-1]+band+2;
177
178 /* Initialization */
179 leftd = 1-low;
180 rightd = up-low+1;
181
182 data->CD[leftd].CC = 0; state[0][leftd] = -1;
183 t = -data->g;
184 for(j = leftd+1; j <= rightd; j++) {
185 data->CD[j].CC = t = t - data->zzh;
186 data->CD[j-1].DD = t - data->m;
187 state[0][j] = 1;
188 }
189 data->CD[rightd+1].CC = MININT;
190 data->CD[rightd].DD = MININT;
191 data->CD[leftd-1].DD = -data->m;
192 data->CD[leftd-1].CC = MININT;
193 for (i = 1; i <= M; i++) {
194 if (i > N-up) rightd--;
195 if (leftd > 1) leftd--;
196 wa = data->w[A[i]];
197 d = data->CD[leftd].DD;
198 k = 0;
199 if ((ib = leftd+low-1+i) > 0) c = data->CD[leftd].CC+wa[B[ib]];
200 if (d > c || ib <= 0) {
201 c = d;
202 k = 2;
203 }
204 e = c - data->m;
205 if(leftd >= 1)
206 if((d -= data->zzh) >= e) {
207 data->CD[leftd - 1].DD = d;
208 k += 20;
209 }
210 else data->CD[leftd - 1].DD = e;
211
212 st = &state[i][leftd];
213 *st++ = (Uint1) k;
214 data->CD[leftd].CC = c;
215 for(curd = leftd + 1, ap = &data->CD[curd]; curd <= rightd; curd++) {
216 c = ap->CC + wa[B[curd + low - 1 + i]];
217 if((d = ap->DD) > c) {
218 if(d > e) {
219 ap->CC = d; e -= data->zzh;
220 (ap++ - 1)->DD = d - data->zzh; *st++ = 32;
221 }
222 else {
223 ap->CC = e; e -= data->zzh;
224 (ap++ - 1)->DD = d - data->zzh; *st++ = 31;
225 }
226 }
227 else if(e > c) {
228 ap->CC = e; e -= data->zzh;
229 (ap++ - 1)->DD = d - data->zzh; *st++ = 31;
230 }
231 else {
232 ap->CC = c;
233 if ((c -= data->m) > (e -= data->zzh)) {
234 e = c;
235 k = 0;
236 }
237 else k = 10;
238 if(c > (d -= data->zzh)) {
239 *st++= (Uint1) k;
240 (ap++ - 1)->DD = c;
241 }
242 else {
243 *st++ = k + 20;
244 (ap++-1)->DD = d;
245 }
246 }
247 }
248 }
249
250 /* decide which path to be traced back */
251 v = (ap-1)->CC;
252 tmp = MemGet(M+N, MGET_ERRPOST);
253 for (i = M, j = rightd, e=0, c = 0; i>=0; i--, c++) {
254 k = (t=state[i][j]) %10;
255 if (t == -1) break;
256 if (e == 1 && (t/10)%2 == 1) k = 1;
257 if (e == 2 && (t/20)== 1) k = 2;
258 if (k == 1) { j--; i++;}
259 else if (k == 2) j++;
260 e = k;
261 tmp[c] = (Uint1) e;
262 }
263 for (i = c-1; i >= 0; i--)
264 switch(tmp[i]) {
265 case 0:
266 _REP;
267 break;
268 case 1:
269 _INS(1);
270 break;
271 case 2:
272 _DEL(1);
273 break;
274 }
275 MemFree(state[0]);
276 MemFree(state);
277 MemFree(tmp);
278 return(v);
279 }
280
281