1 static char const rcsid[] = "$Id: bandalg1.c,v 6.3 2003/05/30 17:25:35 coulouri Exp $";
2 
3 /* ===========================================================================
4 *
5 *                            PUBLIC DOMAIN NOTICE
6 *               National Center for Biotechnology Information
7 *
8 *  This software/database is a "United States Government Work" under the
9 *  terms of the United States Copyright Act.  It was written as part of
10 *  the author's official duties as a United States Government employee and
11 *  thus cannot be copyrighted.  This software/database is freely available
12 *  to the public for use. The National Library of Medicine and the U.S.
13 *  Government have not placed any restriction on its use or reproduction.
14 *
15 *  Although all reasonable efforts have been taken to ensure the accuracy
16 *  and reliability of the software and data, the NLM and the U.S.
17 *  Government do not and cannot warrant the performance or results that
18 *  may be obtained by using this software or data. The NLM and the U.S.
19 *  Government disclaim all warranties, express or implied, including
20 *  warranties of performance, merchantability or fitness for any particular
21 *  purpose.
22 *
23 *  Please cite the author in any work or product based on this material.
24 *
25 * ===========================================================================*/
26 /*****************************************************************************
27 
28 File name: bandalgn1.c
29 
30 Author: Gennadiy Savchuk, Jinqhui Zhang, Tom Madden
31 
32 Contents: Functions to perform global quadratic banded alignments.
33 
34 *****************************************************************************/
35 
36 #include <bandalgn.h>
37 
38 static Int4 g_band1_align(Uint1Ptr A, Uint1Ptr B,
39 			  Int4 M, Int4 N,
40 			  Int4 low, Int4 up,
41 			  data_t *data);
42 
43 static Int4 g_band1_CHECK_SCORE(Uint1Ptr A, Uint1Ptr B,
44 				Int4 M, Int4 N,
45 				Int4 PNTR S, data_t *data);
46 
gband_quadratic(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4Ptr PNTR matrix,PSUGapOptionsPtr option,Int4Ptr S,Int4Ptr Slen)47 Int4 LIBCALL gband_quadratic(Uint1Ptr A, Uint1Ptr B,
48 			     Int4 M, Int4 N,
49 			     Int4Ptr PNTR matrix,
50 			     PSUGapOptionsPtr option,
51 			     Int4Ptr S, Int4Ptr Slen)
52 {
53   data_t data;
54   Int4 c, i, j;
55   Int4 low, up;
56   Int4 band;
57   Int4 score;
58 
59   /* Setup global parameters */
60   data.g = option->gopen;
61   data.zzh = option->gext;
62   data.w = matrix;
63   data.m = data.g + data.zzh;
64 
65   data.sapp = S;
66   data.last = 0;
67   *Slen = 0;
68 
69   low = option->start_diag;
70   band = option->width;
71   up = band + low - 1;
72 
73   low = MIN(MAX(-M, low),MIN(N - M, 0));
74   up = MAX(MIN(N, up),MAX(N - M, 0));
75 
76   if (up < 0 || low > 0) {
77     ErrPostEx(SEV_WARNING, 0, 0,
78 	      "The band does not include (0,0) grid point");
79     return 0;
80   }
81   if (up+M < N || low+M > N) {
82     ErrPostEx(SEV_WARNING, 0, 0,
83 	      "The band does not include lower corner");
84     return 0;
85   }
86 
87   if (N <= 0) {
88     if (M > 0) DEL_(M);
89     return -gap_(M);
90   }
91   if (M <= 0) {
92     INS_(N);
93     return -gap_(N);
94   }
95   if ((band = up-low+1) <= 1) {
96     c = 0;
97     for (i = 1; i <= M; i++) {
98       REP_;
99       c += data.w[A[i]][B[i]];
100     }
101     return c;
102   }
103 
104   j = (band+2) * sizeof(Int4);
105 
106   data.CD = (dp_ptr) MemGet(sizeof(dp_node)*(band+2), MGET_ERRPOST);
107 
108   c = g_band1_align(A, B, M, N, low, up, &data);
109 
110   score = g_band1_CHECK_SCORE(A, B, M, N, S, &data);
111 
112   if (score != c) {
113     ErrPostEx(SEV_WARNING, 0, 0, "Check_Score = %ld align_score = %ld ",
114 	      (long) score, (long) c);
115 	return 0;
116   }
117   MemFree(data.CD);
118 
119   *Slen = data.sapp - S;
120 
121   return c;
122 }
123 
124 /* g_band1_CHECK_SCORE - return the score of the alignment stored in S */
125 
g_band1_CHECK_SCORE(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4 PNTR S,data_t * data)126 static Int4 g_band1_CHECK_SCORE(Uint1Ptr A, Uint1Ptr B,
127 			Int4 M, Int4 N, Int4 PNTR S, data_t *data)
128 {
129   register Int4   i,  j, op;
130   Int4 score;
131 
132   score = i = j = op = 0;
133   while (i < M || j < N) {
134     op = *S++;
135     if (op == 0)
136       score = data->w[A[++i]][B[++j]] + score;
137     else if (op > 0) {
138       score = score - (data->g + op * data->zzh);
139       j = j + op;
140     } else {
141       score = score - (data->g - op * data->zzh);
142       i = i - op;
143     }
144   }
145   return(score);
146 }
147 
148 /* g_band1_align(A,B,M,N,up,low,tb,te,data) returns the cost of an optimum
149    conversion between
150    A[1..M] and B[1..N] and appends such a conversion to the current script.
151    tb(te)= 1  no gap-open penalty if the conversion begins(ends) with a delete.
152    tb(te)= 2  no gap-open penalty if the conversion begins(ends) with an insert.
153 */
154 
g_band1_align(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4 low,Int4 up,data_t * data)155 static Int4 g_band1_align(Uint1Ptr A, Uint1Ptr B,
156 		  Int4 M, Int4 N,
157 		  Int4 low, Int4 up, data_t *data)
158 {
159   Int4 k, v;
160   Int4 band, j;
161   Int4 leftd, rightd;	/* for CC, DD, CP and DP */
162   register Int4 curd;	/* current index for CC, DD CP and DP */
163   register Int4 i;
164   register Int4 c, d, e;
165   register dp_ptr ap;
166   Int4 t;
167   Int4Ptr wa;
168   Int1Ptr PNTR state, st, tmp;
169   Int4 ib;
170 
171 
172   /* Boundary cases: M <= 0 , N <= 0, or up-low <= 0 */
173   band = up-low+1;
174   state = (Int1Ptr PNTR) MemGet(sizeof(Int1Ptr)*(M+1), MGET_ERRPOST);
175   state[0] = (Int1Ptr) MemGet((M+1)*(band+2), MGET_ERRPOST);
176   for (i = 1; i <= M; i++) state[i] = state[i-1]+band+2;
177 
178   /* Initialization */
179   leftd = 1-low;
180   rightd = up-low+1;
181 
182   data->CD[leftd].CC = 0; state[0][leftd] = -1;
183   t = -data->g;
184   for(j = leftd+1; j <= rightd; j++) {
185     data->CD[j].CC = t = t - data->zzh;
186     data->CD[j-1].DD = t - data->m;
187     state[0][j] = 1;
188   }
189   data->CD[rightd+1].CC = MININT;
190   data->CD[rightd].DD = MININT;
191   data->CD[leftd-1].DD = -data->m;
192   data->CD[leftd-1].CC = MININT;
193   for (i = 1; i <= M; i++) {
194     if (i > N-up) rightd--;
195     if (leftd > 1) leftd--;
196     wa = data->w[A[i]];
197     d = data->CD[leftd].DD;
198     k = 0;
199     if ((ib = leftd+low-1+i) > 0) c = data->CD[leftd].CC+wa[B[ib]];
200     if (d > c || ib <= 0) {
201       c = d;
202       k = 2;
203     }
204     e = c - data->m;
205     if(leftd >= 1)
206       if((d -= data->zzh) >= e) {
207 	data->CD[leftd - 1].DD = d;
208 	k += 20;
209       }
210       else data->CD[leftd - 1].DD = e;
211 
212     st = &state[i][leftd];
213     *st++ = (Uint1) k;
214     data->CD[leftd].CC = c;
215     for(curd = leftd + 1, ap = &data->CD[curd]; curd <= rightd; curd++) {
216       c = ap->CC + wa[B[curd + low - 1 + i]];
217       if((d = ap->DD) > c) {
218 	if(d > e) {
219 	  ap->CC = d; e -= data->zzh;
220 	  (ap++ - 1)->DD = d - data->zzh; *st++ = 32;
221 	}
222 	else {
223 	  ap->CC = e; e -= data->zzh;
224 	  (ap++ - 1)->DD = d - data->zzh; *st++ = 31;
225 	}
226       }
227       else if(e > c) {
228 	ap->CC = e; e -= data->zzh;
229 	(ap++ - 1)->DD = d - data->zzh; *st++ = 31;
230       }
231       else {
232 	ap->CC = c;
233 	if ((c -= data->m) > (e -= data->zzh)) {
234 	  e = c;
235 	  k = 0;
236 	}
237 	else k = 10;
238 	if(c > (d -= data->zzh)) {
239 	  *st++= (Uint1) k;
240 	  (ap++ - 1)->DD = c;
241 	}
242 	else {
243 	  *st++ = k + 20;
244 	  (ap++-1)->DD = d;
245 	}
246       }
247     }
248   }
249 
250   /* decide which path to be traced back */
251   v = (ap-1)->CC;
252   tmp = MemGet(M+N, MGET_ERRPOST);
253   for (i = M, j = rightd, e=0, c = 0; i>=0; i--, c++) {
254     k  = (t=state[i][j]) %10;
255     if (t == -1) break;
256     if (e == 1 && (t/10)%2 == 1) k = 1;
257     if (e == 2 && (t/20)== 1) k = 2;
258     if (k == 1) { j--; i++;}
259     else if (k == 2) j++;
260     e = k;
261     tmp[c] = (Uint1) e;
262   }
263   for (i = c-1; i >= 0; i--)
264     switch(tmp[i]) {
265     case 0:
266       _REP;
267       break;
268     case 1:
269       _INS(1);
270       break;
271     case 2:
272       _DEL(1);
273       break;
274     }
275   MemFree(state[0]);
276   MemFree(state);
277   MemFree(tmp);
278   return(v);
279 }
280 
281