1 static char const rcsid[] = "$Id: csim.c,v 6.2 2003/05/30 17:25:36 coulouri Exp $";
2
3 /* csim.c
4 * ===========================================================================
5 *
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's official duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government have not placed any restriction on its use or reproduction.
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * Please cite the author in any work or product based on this material.
25 *
26 * ===========================================================================
27 *
28 * File Name: csim.c
29 *
30 * Author: Jinghui Zhang
31 *
32 * Version Creation Date: 5/24/95
33 *
34 *
35 * File Description: functions for using the sim/sim2 algorithm
36 *
37 * Modifications:
38 * --------------------------------------------------------------------------
39 * Date Name Description of modification
40 *
41 * $Log: csim.c,v $
42 * Revision 6.2 2003/05/30 17:25:36 coulouri
43 * add rcsid
44 *
45 * Revision 6.1 1998/09/09 11:47:43 sirotkin
46 * MAXSEG to NLM_MAX_SEG to avoid RS6000 occasional conflict
47 *
48 * Revision 6.0 1997/08/25 18:52:57 madden
49 * Revision changed to 6.0
50 *
51 * Revision 5.1 1997/03/14 15:45:14 kans
52 * undefine DEL and INS if previously defined in simutil.h
53 *
54 * Revision 5.0 1996/05/28 13:43:15 ostell
55 * Set to revision 5.0
56 *
57 * Revision 4.1 1995/09/27 19:46:44 zjing
58 * protection for the default gap value
59 *
60 * Revision 4.0 1995/07/26 13:50:15 ostell
61 * force revision to 4.0
62 *
63 * Revision 1.2 1995/07/17 20:36:21 kans
64 * prototypes for Mac compilers
65 *
66 * Revision 1.1 1995/07/17 19:01:41 zjing
67 * Initial revision
68 *
69 *
70 *
71 * ==========================================================================
72 */
73
74 #include <simutil.h>
75
76
77 typedef struct ONE
78 {
79 Int4 COL ;
80 struct ONE PNTR NEXT ;
81 } pair, PNTR pairptr;
82
83 typedef struct NODE
84 {
85 Int4 SCORE;
86 Int4 STARI;
87 Int4 STARJ;
88 Int4 ENDI;
89 Int4 ENDJ;
90 Int4 TOP;
91 Int4 BOT;
92 Int4 LEFT;
93 Int4 RIGHT;
94 } vertex, PNTR vertexptr;
95
96 typedef struct NODE_LIST
97 {
98 vertexptr PNTR LIST; /* an array for saving k best scores */
99 vertexptr low; /* lowest score node in LIST */
100 vertexptr most; /* latestly accessed node in LIST */
101 Int4 numnode; /* the number of nodes in LIST */
102 Int4 min; /* minimum score in LIST */
103 Int4 m1, mm, n1, nn; /* boundaries of recomputed area */
104 }node_list, PNTR listptr;
105
106 typedef struct VEC
107 {
108 Int4Ptr CC, DD; /* saving matrix scores */
109 Int4Ptr RR, SS, EE, FF; /* saving start-points */
110 Int4Ptr HH, WW; /* saving matrix scores */
111 Int4Ptr II, JJ, XX, YY; /* saving start-points */
112 }vec, PNTR vecptr;
113
114 typedef struct MATRIX
115 {
116 Int2Ptr PNTR v;
117 Int4 q, r, qr;
118 }matrix;
119
120 typedef struct SCRIPT
121 {
122 Int4 PNTR sapp; /* Current script append ptr */
123 Int4 last; /* Last script op appended */
124 Int4 I, J; /* current positions of A ,B */
125 Int4 no_mat; /* number of matches */
126 Int4 no_mis; /* number of mismatches */
127 Int4 al_len; /* length of alignment */
128 }script, PNTR scriptr;
129
130
131 static pairptr z, PNTR row; /*for saving used aligned paires*/
132
133 static SeqAlignPtr SIM(SeqLocPtr loc1, SeqLocPtr loc2, CharPtr A, CharPtr B, Int4 M, Int4 N, Int4 K, Int4 nseq, FloatHi cut_off, Int2Ptr PNTR V, Int4 Q, Int4 R);
134 static void big_pass(CharPtr A, CharPtr B, Int4 M, Int4 N, Int4 K, Int4 nseq, matrix value, vecptr s_vec, listptr plist, pairptr PNTR row) ;
135 static void diff(CharPtr A, CharPtr B, Int4 M, Int4 N, Int4 tb, Int4 te, matrix value, scriptr s_ptr, pairptr PNTR row) ;
136 static Int4 diff1(CharPtr A, CharPtr B, Int4 M, Int4 N, Int4 tb, Int4 te, Int4Ptr CC, Int4Ptr DD, Int4Ptr RR, Int4Ptr SS, matrix value, scriptr s_ptr, pairptr PNTR row);
137 static void locate(CharPtr A, CharPtr B, Int4 nseq, matrix value, vecptr s_vec, BoolPtr flag, listptr plist, pairptr PNTR row) ;
138 static void small_pass(CharPtr A, CharPtr B, Int4 count, Int4 nseq, matrix value, vecptr s_vec, listptr plist, pairptr PNTR row) ;
139 static Int4 addnode(Int4 c, Int4 ci, Int4 cj, Int4 i, Int4 j, Int4 K, listptr plist) ;
140 static vertexptr findmax(listptr plist);
141 static Boolean no_cross(BoolPtr flag, listptr plist, Int4Ptr rl, Int4Ptr cl);
142 static void display(CharPtr A, CharPtr B, Int4 M, Int4 N, Int4Ptr S, Int4 AP, Int4 BP, Int4Ptr rec_a1, Int4Ptr rec_a2, Int2Ptr POS) ;
143
144 /*****************************************************************************
145 ***
146 * CSIM() return the Seq-align objects for the two Seq-locs aligned with
147 * the sim algorithm.
148 * for protein sequences, if pam_file == NULL, use the default matrix
149 * PAM 200
150 *
151 *****************************************************************************
152 ***/
153
154
155 /***************************************************************************
156 ***
157 * set_matrix: functions for set the matrix
158 *
159 ****************************************************************************
160 ***/
set_pam_200(Int2Ptr PNTR V)161 static void set_pam_200(Int2Ptr PNTR V)
162 {
163 static Int2 wgts[23][23] = { /* the PAM200 matrix */
164
165 2,-2, 0, 0,-2,-1, 0, 1,-1,-1,-2,-1,-1,-3, 1, 1, 1,-5,-3, 0, 1, 1, 0,
166 -2, 5, 0,-1,-3, 1,-1,-3, 1,-2,-2, 2, 0,-3, 0, 0,-1, 1,-4,-2, 0, 1, 0,
167 0, 0, 2, 2,-3, 0, 1, 0, 1,-1,-2, 1,-2,-2, 0, 1, 0,-4,-1,-2, 3, 2, 0,
168 0,-1, 2, 3,-4, 1, 3, 0, 0,-2,-3, 0,-2,-4,-1, 0, 0,-6,-3,-2, 4, 3, 0,
169 -2,-3,-3,-4, 8,-4,-4,-3,-3,-2,-5,-4,-4,-4,-2, 0,-2,-6, 0,-2,-3,-3, 0,
170 -1, 1, 0, 1,-4, 4, 2,-1, 2,-2,-1, 0,-1,-4, 0,-1,-1,-4,-3,-2, 2, 4, 0,
171 0,-1, 1, 3,-4, 2, 3, 0, 0,-2,-3, 0,-2,-4, 0, 0, 0,-6,-3,-2, 3, 4, 0,
172 1,-3, 0, 0,-3,-1, 0, 4,-2,-2,-3,-2,-3,-3,-1, 1, 0,-5,-4,-1, 1, 0, 0,
173 -1, 1, 1, 0,-3, 2, 0,-2, 5,-2,-2, 0,-2,-1, 0,-1,-1,-3, 0,-2, 2, 2, 0,
174 -1,-2,-1,-2,-2,-2,-2,-2,-2, 4, 2,-1, 2, 1,-2,-1, 0,-5,-1, 3,-1,-1, 0,
175 -2,-2,-2,-3,-5,-1,-3,-3,-2, 2, 4,-2, 3, 1,-2,-2,-1,-4,-1, 1,-2,-1, 0,
176 -1, 2, 1, 0,-4, 0, 0,-2, 0,-1,-2, 4, 1,-4,-1, 0, 0,-3,-4,-2, 1, 1, 0,
177 -1, 0,-2,-2,-4,-1,-2,-3,-2, 2, 3, 1, 5, 0,-2,-1, 0,-4,-2, 1,-1, 0, 0,
178 -3,-3,-2,-4,-4,-4,-4,-3,-1, 1, 1,-4, 0, 7,-4,-2,-2, 0, 5,-1,-2,-3, 0,
179 1, 0, 0,-1,-2, 0, 0,-1, 0,-2,-2,-1,-2,-4, 5, 1, 0,-5,-4,-1, 0, 1, 0,
180 1, 0, 1, 0, 0,-1, 0, 1,-1,-1,-2, 0,-1,-2, 1, 2, 1,-2,-2,-1, 1, 1, 0,
181 1,-1, 0, 0,-2,-1, 0, 0,-1, 0,-1, 0, 0,-2, 0, 1, 3,-4,-2, 0, 1, 0, 0,
182 -5, 1,-4,-6,-6,-4,-6,-5,-3,-5,-4,-3,-4, 0,-5,-2,-4,12, 0,-6,-3,-4, 0,
183 -3,-4,-1,-3, 0,-3,-3,-4, 0,-1,-1,-4,-2, 5,-4,-2,-2, 0, 7,-2,-1,-2, 0,
184 0,-2,-2,-2,-2,-2,-2,-1,-2, 3, 1,-2, 1,-1,-1,-1, 0,-6,-2, 4,-1,-1, 0,
185 1, 0, 3, 4,-3, 2, 3, 1, 2,-1,-2, 1,-1,-2, 0, 1, 1,-3,-1,-1, 4, 4, 0,
186 1, 1, 2, 3,-3, 4, 4, 0, 2,-1,-1, 1, 0,-3, 1, 1, 0,-4,-2,-1, 4, 5, 0,
187 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188 };
189
190 Int2 alpha=23;
191 Int2 i, j;
192 CharPtr achars="ARNDCQEGHILKMFPSTWYVBZX";
193
194 for(i=0; i<alpha; ++i)
195 for(j=0; j<alpha; ++j)
196 V[achars[i]][achars[j]]=10*(wgts[i][j]);
197
198 }
199
set_pam_file(Int2Ptr PNTR V,CharPtr pam_file)200 static Boolean set_pam_file(Int2Ptr PNTR V, CharPtr pam_file)
201 {
202 FILE *fp;
203 FloatLo weight;
204 Char buf[200], alph[SIZE_MATRIX];
205 CharPtr p;
206 Int2 alph_size;
207 Int2 i, j;
208
209 if((fp=FileOpen(pam_file, "r")) == NULL)
210 {
211 Message(MSG_ERROR, "Fail to open matrix file %s, use PAM200 instead", pam_file);
212 return FALSE;
213 }
214
215 while(FileGets(buf, 199, fp)) /*get rid of empty lines*/
216 if (buf[0]==' ' || IS_UPPER(buf[0]))
217 break;
218
219 /*check the first line to get the alpha-bet order and size*/
220 for(alph_size=0, p=buf; *p!='\0'; ++p)
221 if(IS_UPPER(*p))
222 alph[alph_size++]=*p;
223
224 for(i=0; i<alph_size; ++i)
225 for (j =0; j<alph_size; ++j)
226 {
227 fscanf(fp, "%f", &weight); /**get the matrix**/
228 V[alph[i]][alph[j]]=(Int2)(10*weight);
229 }
230
231 FileClose(fp);
232 return TRUE;
233 }
234
235
DNA_matrix(Int2Ptr PNTR V,FloatHi M_val,FloatHi I_val,FloatHi V_val)236 static void DNA_matrix(Int2Ptr PNTR V, FloatHi M_val, FloatHi I_val, FloatHi V_val)
237 {
238
239 FloatHi parm_M, parm_I, parm_V;
240
241
242 /***set up values for parm_M, parm_I, parm_V**/
243 parm_M=M_val;
244 parm_M +=(parm_M > 0.0)? 0.05: -0.05;
245
246 parm_I=I_val;
247 parm_I +=(parm_I > 0.0)? 0.05: -0.05;
248
249 parm_V=V_val;
250 parm_V +=(parm_V > 0.0)? 0.05: -0.05;
251
252 /**set up DNA matrix**/
253 V['A']['A']=V['C']['C']=V['G']['G']=V['T']['T']=(Int2)(10*parm_M);
254 V['A']['G']=V['G']['A']=V['C']['T']=V['T']['C']=(Int2)(10*parm_I);
255 V['A']['C']=V['A']['T']=V['C']['A']=V['C']['G']=(Int2)(10*parm_V);
256 V['G']['C']=V['G']['T']=V['T']['A']=V['T']['G']=(Int2)(10*parm_V);
257 }
258
259
260 /*****************************************************************
261 *
262 * set_matrix(V, pspp, is_dna)
263 * set up weight matrix for SIM_1
264 *
265 * V: the matrix
266 * pspp: the parameters used for SIM
267 * is_dna: if TRUE, DNA sequences are compared
268 * return TRUE if PAM200 is set up
269 *
270 ********************************************************************/
set_matrix(Int2Ptr PNTR V,PSimPamPtr pspp,Boolean is_dna)271 static Boolean set_matrix(Int2Ptr PNTR V, PSimPamPtr pspp, Boolean is_dna)
272 {
273
274 if(is_dna)
275 {
276 DNA_matrix(V, pspp->M_val, pspp->I_val, pspp->V_val);
277 return FALSE;
278 }
279 else
280 {
281 if(pspp->pam_file == NULL)
282 {
283 set_pam_200(V);
284 return TRUE;
285 }
286 else
287 {
288 if(!set_pam_file(V, pspp->pam_file))
289 {
290 set_pam_200(V);
291 return TRUE;
292 }
293 else
294 return FALSE;
295 }
296 }
297 }
298
299
300
301
302
303 /**get the values for open a gap and gap extension for each base**/
gap_val(Int4Ptr open_gap,Int4Ptr ext_gap,FloatHi O_val,FloatHi E_val,Boolean use_default,Boolean pam_200)304 static void gap_val(Int4Ptr open_gap, Int4Ptr ext_gap, FloatHi O_val, FloatHi E_val, Boolean use_default, Boolean pam_200)
305 {
306
307 FloatHi parm_O, parm_E;
308
309 if(use_default)
310 {
311 if(pam_200) /**PAM200 is useed**/
312 {
313 parm_O=DEFAULT_PAM_O;
314 parm_E=DEFAULT_PAM_E;
315 }
316 else
317 {
318 parm_O = DEFAULT_O;
319 parm_E = DEFAULT_E;
320 }
321 }
322 else
323 {
324 parm_O=O_val;
325 parm_E=E_val;
326 }
327
328 parm_O += (parm_O >0)? 0.05 : -0.05;
329 *open_gap=(Int4)(10 * parm_O);
330
331 parm_E +=(parm_E>0)? 0.05: -0.05;
332 *ext_gap=(Int4)(10 * parm_E);
333 }
334
335
336 /***************************************************************************
337 ***
338 * CSIM(): compute K top alignment between two sequence locations (loc1,
339 * loc2).
340 * loc1, loc2: tow sequence locations. For DNA sequences, if the strand
341 * of loc2 is unknown, it will search in both orientations
342 * K: the number of alignments to compute
343 * cut_off: the cut off score for each alignment
344 * sim1_pam: Parameters for SIM1. set to NULL for default
345 *
346 *****************************************************************************
347 ***/
CSIM(SeqLocPtr loc1,SeqLocPtr loc2,Int4 K,FloatHi cut_off,PSimPamPtr sim1_pam)348 SeqAlignPtr CSIM(SeqLocPtr loc1, SeqLocPtr loc2, Int4 K, FloatHi cut_off, PSimPamPtr sim1_pam)
349 {
350
351 Boolean both_strand;
352 SeqAlignPtr out_sap, out_sapC;
353
354 CharPtr seq1, seq2;
355 Int4 seq1_len, seq2_len;
356 Int4 nseq;
357 Boolean is_dna, pam_flag;
358 Int2Ptr PNTR V;
359 Int4 Q, R;
360 Int2 i;
361
362 PSimPam psp;
363 PSimPamPtr pspp;
364
365 if(loc1 == NULL || loc2 == NULL)
366 return NULL;
367 if(loc1->choice != SEQLOC_INT || loc2->choice !=SEQLOC_INT)
368 return NULL;
369
370 if(sim1_pam == NULL)
371 {
372 DefaultPSimPam(&psp);
373 pspp = &psp;
374 }
375 else
376 {
377 /*protect the default gap open and extension value*/
378 if(sim1_pam->O != DEFAULT_O || sim1_pam->E != DEFAULT_E)
379 sim1_pam->def_gap = FALSE;
380 else
381 sim1_pam->def_gap = TRUE;
382
383 pspp = sim1_pam;
384 }
385
386 if(SeqLocCompare(loc1, loc2) == SLC_A_EQ_B)
387 nseq = 1;
388 else
389 nseq = 2;
390
391 both_strand = check_strand_mol (loc2, &is_dna);
392 if(both_strand)
393 Change_Loc_Strand(loc2, Seq_strand_plus);
394
395 /**initiate the matrix space**/
396 V=MemNew((size_t)(SIZE_MATRIX) * sizeof(Int2Ptr));
397 for(i=0; i<SIZE_MATRIX; ++i)
398 V[i]=(Int2Ptr)MemNew((size_t)(SIZE_MATRIX)*sizeof(Int2));
399
400 /**get the score matrix and the gap values**/
401 pam_flag = set_matrix(V, pspp, is_dna);
402 gap_val(&Q, &R, pspp->O, pspp->E, pspp->def_gap, pam_flag);
403
404
405
406 seq1_len = SeqLocLen(loc1);
407 seq2_len = SeqLocLen(loc2);
408 seq1 = make_sim_seq(loc1, FALSE, NULL);
409 seq2 = make_sim_seq(loc2, FALSE, NULL);
410
411 out_sap= SIM(loc1, loc2, seq1, seq2, seq1_len, seq2_len, K, nseq, cut_off, V, Q, R);
412 if(both_strand){
413 Change_Loc_Strand(loc2, Seq_strand_minus);
414 seq2 = make_sim_seq(loc2, FALSE, seq2);
415 out_sapC= SIM(loc1, loc2, seq1, seq2, seq1_len, seq2_len, K, nseq, cut_off, V, Q, R);
416 out_sap= link_align(out_sapC, out_sap);
417 out_sap = get_top_K_alignment(out_sap, K, cut_off);
418 }
419
420 MemFree(seq1);
421 MemFree(seq2);
422 for (i=0; i<SIZE_MATRIX; ++i)
423 MemFree ( V[i]);
424 MemFree(V);
425
426
427 return out_sap;
428
429
430 }/**end of CSIM**/
431
432
433
434 /* DIAG() assigns value to x if (ii,jj) is never used before */
435 #define DIAG(ii, jj, x, pvalue) \
436 { for ( tt = 1, z = row[(ii)]; z != 0; z = z->NEXT ) \
437 if ( z->COL == (jj) ) \
438 { tt = 0; break; } \
439 if ( tt ) \
440 x = ( pvalue ); \
441 }
442
443 /* replace (ss1, xx1, yy1) by (ss2, xx2, yy2) if the latter is large */
444 #define ORDER(ss1, xx1, yy1, ss2, xx2, yy2) \
445 { if ( ss1 < ss2 ) \
446 { ss1 = ss2; xx1 = xx2; yy1 = yy2; } \
447 else \
448 if ( ss1 == ss2 ) \
449 { if ( xx1 < xx2 ) \
450 { xx1 = xx2; yy1 = yy2; } \
451 else \
452 if ( xx1 == xx2 && yy1 < yy2 ) \
453 yy1 = yy2; \
454 } \
455 }
456
457
458
459 /* SIM(A,B,M,N,K,is_dna,nseq,val) reports K best non-intersecting alignments of
460 the segments of A and B in order of similarity scores. If the score
461 is below val, the alignments will be discarded*/
462
SIM(SeqLocPtr loc1,SeqLocPtr loc2,CharPtr A,CharPtr B,Int4 M,Int4 N,Int4 K,Int4 nseq,FloatHi cut_off,Int2Ptr PNTR V,Int4 Q,Int4 R)463 static SeqAlignPtr SIM(SeqLocPtr loc1, SeqLocPtr loc2, CharPtr A, CharPtr B, Int4 M, Int4 N, Int4 K, Int4 nseq, FloatHi cut_off, Int2Ptr PNTR V, Int4 Q, Int4 R)
464 {
465
466 SeqAlignPtr new, head = NULL;
467 Int4Ptr rec_a1, rec_a2;
468 Int2 POS;
469
470 Int4 endi, endj, stari, starj; /* endpoint and startpoint */
471 Int4 score; /* the max score in (plist->LIST) */
472 Int4 count; /* maximum size of list */
473 register Int4 i; /* row and column indices */
474 Int4 PNTR S; /* saving operations for diff */
475 vertexptr cur; /* temporary pointer */
476 vecptr s_vec; /*space for processing*/
477 matrix value; /*values for the matrix and gap penalty*/
478 Boolean flag; /*indicate if recomputation is necessary*/
479 Int4 rl, cl; /*left and top boundaries*/
480 scriptr s_ptr; /* script for the alignment*/
481 listptr plist; /*list for saving the best scores*/
482 Int4 q;
483
484 FloatHi re_score;
485
486
487
488 /**allocate the space for recording the result**/
489 rec_a1 = MemNew((size_t)NLM_MAXSEG * sizeof(Int4));
490 rec_a2 = MemNew((size_t)NLM_MAXSEG * sizeof(Int4));
491
492 /*get the matrix value and gap penalties*/
493 value.v=V;
494 value.q=q=Q;
495 value.r=R;
496 value.qr=Q+R;
497
498 /*initiate the siace for process*/
499 s_vec=(vecptr)MemNew(sizeof(vec));
500 /* allocate space for all vectors */
501 /*j =(size_t) (N + 1) * sizeof(Int4);*/
502 s_vec->CC = ( Int4Ptr ) MemNew((size_t)(N+1)*sizeof(Int4));
503 s_vec->DD = ( Int4Ptr ) MemNew((size_t)(N+1)*sizeof(Int4));
504 s_vec->RR = ( Int4Ptr ) MemNew((size_t)(N+1)*sizeof(Int4));
505 s_vec->SS = ( Int4Ptr ) MemNew((size_t)(N+1)*sizeof(Int4));
506 s_vec->EE = ( Int4Ptr ) MemNew((size_t)(N+1)*sizeof(Int4));
507 s_vec->FF = ( Int4Ptr ) MemNew((size_t)(N+1)*sizeof(Int4));
508 /*i = (M + 1) * sizeof(Int4);*/
509 s_vec->HH = ( Int4Ptr ) MemNew((size_t)(M+1)*sizeof(Int4));
510 s_vec->WW = ( Int4Ptr ) MemNew((size_t)(M+1)*sizeof(Int4));
511 s_vec->II = ( Int4Ptr ) MemNew((size_t)(M+1)*sizeof(Int4));
512 s_vec->JJ = ( Int4Ptr ) MemNew((size_t)(M+1)*sizeof(Int4));
513 s_vec->XX = ( Int4Ptr ) MemNew((size_t)(M+1)*sizeof(Int4));
514 s_vec->YY = ( Int4Ptr ) MemNew((size_t)(M+1)*sizeof(Int4));
515 S = ( Int4Ptr ) MemNew((size_t)(M+N+2)*sizeof(Int4));
516 row = ( pairptr PNTR ) MemNew( (size_t)(M + 1) * sizeof(pairptr));
517
518 /* set up list for each row */
519 for ( i = 1; i <= M; i++ )
520 if ( nseq == 2 )
521 row[i] = 0;
522 else
523 { row[i] = ( pairptr ) MemNew( sizeof(pair));
524 z=row[i];
525 z->COL = i;
526 z->NEXT = 0;
527 }
528
529 /*initiate the script*/
530 plist=(listptr)MemNew(sizeof(node_list));
531 plist->low=NULL;
532 plist->most=NULL;
533
534 (plist->LIST) = ( vertexptr PNTR ) MemNew((size_t) K * sizeof(vertexptr));
535 for ( i = 0; i < K ; i++ )
536 (plist->LIST)[i] = ( vertexptr ) MemNew( sizeof(vertex));
537 (plist->numnode) = (plist->min) = 0;
538 s_ptr=(scriptr)MemNew(sizeof(script));
539
540
541 big_pass(A,B,M,N,K,nseq, value, s_vec, plist, row);
542
543 /* Report the K best alignments one by one. After each alignment is
544 output, recompute part of the matrix. First determinee the size
545 of the area to be recomputed, then do the recomputation */
546
547 for ( count = K - 1; count >= 0 ; count-- )
548 {
549 POS=0;
550 if ( (plist->numnode) == 0 )
551 fatal("The number of alignments computed is too large");
552 cur = findmax(plist); /*Return a pointer to a node with max score*/
553
554 score = cur->SCORE;
555 stari = ++cur->STARI;
556 starj = ++cur->STARJ;
557 endi = cur->ENDI;
558 endj = cur->ENDJ;
559 (plist->m1) = cur->TOP;
560 (plist->mm) = cur->BOT;
561 (plist->n1) = cur->LEFT;
562 (plist->nn) = cur->RIGHT;
563 rl = endi - stari + 1;
564 cl = endj - starj + 1;
565 s_ptr->I = stari - 1;
566 s_ptr->J = starj - 1;
567 s_ptr->sapp = S;
568 s_ptr->last = 0;
569 (s_ptr->al_len) = 0;
570 (s_ptr->no_mat) = 0;
571 (s_ptr->no_mis) = 0;
572
573 diff(&A[stari]-1, &B[starj]-1,rl,cl,q,q, value, s_ptr, row);
574
575 /* Output the best alignment */
576 re_score = (FloatHi)(score/10.0);
577
578 display(&A[stari]-1,&B[starj]-1,rl,cl,S,stari,starj, rec_a1, rec_a2, &POS);
579 if(re_score >= cut_off) /*save the result to Seq-align*/
580 {
581 new = make_align(rec_a1, rec_a2, POS, re_score, NULL, loc1, loc2);
582 head = link_align(new, head);
583 }
584
585 if ( count )
586 { flag = FALSE;
587
588 locate(A,B,nseq, value, s_vec, &flag, plist, row);
589
590 if ( flag )
591 small_pass(A,B,count,nseq, value, s_vec, plist, row);
592 }
593
594 }
595 MemFree(rec_a1);
596 MemFree(rec_a2);
597 MemFree(S);
598 MemFree(s_vec->CC);
599 MemFree(s_vec->DD);
600 MemFree(s_vec->RR);
601 MemFree(s_vec->SS);
602 MemFree(s_vec->EE);
603 MemFree(s_vec->FF);
604 MemFree(s_vec->HH);
605 MemFree(s_vec->WW);
606 MemFree(s_vec->II);
607 MemFree(s_vec->JJ);
608 MemFree(s_vec->XX);
609 MemFree(s_vec->YY);
610 MemFree(s_vec);
611 MemFree(s_ptr);
612
613 for (i=1; i<=M; i++)
614 MemFree(row[i]);
615 MemFree (row);
616
617
618 for(i=0; i<K; i++)
619 MemFree((plist->LIST)[i]);
620 MemFree(plist->LIST);
621 MemFree(plist);
622
623 return head;
624 }
625
626 /* A big pass to compute K best classes */
627
big_pass(CharPtr A,CharPtr B,Int4 M,Int4 N,Int4 K,Int4 nseq,matrix value,vecptr s_vec,listptr plist,pairptr PNTR row)628 static void big_pass(CharPtr A, CharPtr B, Int4 M, Int4 N, Int4 K, Int4 nseq, matrix value, vecptr s_vec, listptr plist, pairptr PNTR row)
629 {
630 register Int4 i, j; /* row and column indices */
631 register Int4 c; /* best score at current point */
632 register Int4 f; /* best score ending with insertion */
633 register Int4 d; /* best score ending with deletion */
634 register Int4 p; /* best score at (i-1, j-1) */
635 register Int4 ci, cj; /* end-point associated with c */
636 register Int4 di, dj; /* end-point associated with d */
637 register Int4 fi, fj; /* end-point associated with f */
638 register Int4 pi, pj; /* end-point associated with p */
639 Int2Ptr va, PNTR v; /* pointer to v(A[i], B[j]) */
640 Int4 q, r, qr;
641 Boolean tt;
642 /**pairptr z;**/
643
644 q=value.q;
645 r=value.r;
646 qr=value.qr;
647 v=value.v;
648
649
650 /* Compute the matrix and save the top K best scores in (plist->LIST)
651 s_vec->CC : the scores of the current row
652 s_vec->RR and s_vec->EE : the starting point that leads to score s_vec->CC
653 s_vec->DD : the scores of the current row, ending with deletion
654 s_vec->SS and s_vec->FF : the starting point that leads to score s_vec->DD */
655 /* Initialize the 0 th row */
656 for ( j = 1; j <= N ; j++ )
657 { s_vec->CC[j] = 0;
658 s_vec->RR[j] = 0;
659 s_vec->EE[j] = j;
660 s_vec->DD[j] = - (q);
661 s_vec->SS[j] = 0;
662 s_vec->FF[j] = j;
663 }
664 for ( i = 1; i <= M; i++)
665 { c = 0; /* Initialize column 0 */
666 f = - (q);
667 ci = fi = i;
668 va = v[A[i]];
669 if ( nseq == 2 )
670 { p = 0;
671 pi = i - 1;
672 cj = fj = pj = 0;
673 }
674 else
675 { p = s_vec->CC[i];
676 pi = s_vec->RR[i];
677 pj = s_vec->EE[i];
678 cj = fj = i;
679 }
680 for ( j = (nseq == 2 ? 1 : (i+1)) ; j <= N ; j++ )
681 { f = f - r;
682 c = c - qr;
683 ORDER(f, fi, fj, c, ci, cj)
684 c = s_vec->CC[j] - qr;
685 ci = s_vec->RR[j];
686 cj = s_vec->EE[j];
687 d = s_vec->DD[j] - r;
688 di = s_vec->SS[j];
689 dj = s_vec->FF[j];
690 ORDER(d, di, dj, c, ci, cj)
691 c = 0;
692 DIAG(i, j, c, p+va[B[j]]) /* diagonal */
693 if ( c <= 0 )
694 { c = 0; ci = i; cj = j; }
695 else
696 { ci = pi; cj = pj; }
697 ORDER(c, ci, cj, d, di, dj)
698 ORDER(c, ci, cj, f, fi, fj)
699 p = s_vec->CC[j];
700 s_vec->CC[j] = c;
701 pi = s_vec->RR[j];
702 pj = s_vec->EE[j];
703 s_vec->RR[j] = ci;
704 s_vec->EE[j] = cj;
705 s_vec->DD[j] = d;
706 s_vec->SS[j] = di;
707 s_vec->FF[j] = dj;
708 if ( c > (plist->min) ) /* add the score into list */
709 (plist->min) = addnode(c, ci, cj, i, j, K, plist);
710 }
711 }
712 }
713
714 #ifdef DEL
715 #undef DEL /* in simutil.h */
716 #endif
717 #ifdef INS
718 #undef INS /* in simutil.h */
719 #endif
720
721 #define GAP(k) ((k) <= 0 ? 0 : q+r*(k)) /* k-symbol indel score */
722 #define DEL(s_ptr, k) \
723 { (s_ptr->I) += (k); \
724 (s_ptr->al_len) += (k); \
725 if ((s_ptr->last) < 0) \
726 (s_ptr->last) = s_ptr->sapp[-1] -= (k); \
727 else \
728 (s_ptr->last) = *(s_ptr->sapp)++ = -(k); \
729 }
730 /* Append "s_ptr->Insert k" op */
731 #define INS(k) \
732 { (s_ptr->J) += k; \
733 (s_ptr->al_len) += k; \
734 if ((s_ptr->last) < 0) \
735 { s_ptr->sapp[-1] = (k); *(s_ptr->sapp)++ = (s_ptr->last); } \
736 else \
737 (s_ptr->last) = *(s_ptr->sapp)++ = (k); \
738 }
739
740 /* Append "Replace" op */
741 #define REP \
742 { (s_ptr->last) = *(s_ptr->sapp)++ = 0; \
743 (s_ptr->al_len) += 1; \
744 }
745
diff(CharPtr A,CharPtr B,Int4 M,Int4 N,Int4 tb,Int4 te,matrix value,scriptr s_ptr,pairptr PNTR row)746 static void diff(CharPtr A, CharPtr B, Int4 M, Int4 N, Int4 tb, Int4 te, matrix value, scriptr s_ptr, pairptr PNTR row)
747 {
748 Int4Ptr CC, DD, RR, SS;
749
750 /*j= (N+1) * sizeof (Int4);*/
751 CC= (Int4Ptr) MemNew ((size_t)(N+1)*sizeof(Int4));
752 DD= (Int4Ptr) MemNew ((size_t)(N+1)*sizeof(Int4));
753 RR= (Int4Ptr) MemNew ((size_t)(N+1)*sizeof(Int4));
754 SS= (Int4Ptr) MemNew ((size_t)(N+1)*sizeof(Int4));
755
756 diff1(A, B, M, N, tb, te, CC, DD, RR, SS, value, s_ptr, row);
757 MemFree(CC);
758 MemFree(DD);
759 MemFree(RR);
760 MemFree(SS);
761
762 }
763
diff1(CharPtr A,CharPtr B,Int4 M,Int4 N,Int4 tb,Int4 te,Int4Ptr CC,Int4Ptr DD,Int4Ptr RR,Int4Ptr SS,matrix value,scriptr s_ptr,pairptr PNTR row)764 static Int4 diff1(CharPtr A, CharPtr B, Int4 M, Int4 N, Int4 tb, Int4 te, Int4Ptr CC, Int4Ptr DD, Int4Ptr RR, Int4Ptr SS, matrix value, scriptr s_ptr, pairptr PNTR row)
765 {
766 Int4 midi, midj, type; /* Midpoint, type, and cost */
767 Int4 midc;
768 Int4 q, r, qr; /* matrix and the gap penalty value*/
769 Int2Ptr PNTR v;
770 Boolean tt;
771 /**pairptr z;**/
772
773 register Int4 i, j;
774 register Int4 c, e, d, s;
775 Int4 t;
776 Int2 PNTR va;
777
778 q=value.q;
779 r=value.r;
780 qr=value.qr;
781 v=value.v;
782
783
784
785 /* Boundary cases: M <= 1 or N == 0 */
786
787 if (N <= 0)
788 { if (M > 0) DEL(s_ptr, M)
789 return - GAP(M);
790 }
791 if (M <= 1)
792 { if (M <= 0)
793 { INS(N);
794 return - GAP(N);
795 }
796 if (tb > te) tb = te;
797 midc = - (tb + r + GAP(N) );
798 midj = 0;
799 va = v[A[1]];
800 for (j = 1; j <= N; j++)
801 { for ( tt = 1, z = row[(s_ptr->I)+1]; z != 0; z = z->NEXT )
802 if ( z->COL == j+(s_ptr->J) )
803 { tt = 0; break; }
804 if ( tt )
805 { c = va[B[j]] - ( GAP(j-1) + GAP(N-j) );
806 if (c > midc)
807 { midc = c;
808 midj = j;
809 }
810 }
811 }
812 if (midj == 0)
813 { INS(N) DEL(s_ptr, 1) }
814 else
815 { if (midj > 1) INS(midj-1)
816 REP
817 if ( A[1] == B[midj] )
818 (s_ptr->no_mat) += 1;
819 else
820 (s_ptr->no_mis) += 1;
821 /* mark (A[(s_ptr->I)],B[J]) as used: put J into list row[(s_ptr->I)] */
822 (s_ptr->I)++; (s_ptr->J)++;
823 /**if(row[(s_ptr->I)]==NULL)
824 row[(s_ptr->I)] = ( pairptr ) MemNew( sizeof(pair));
825 (row[(s_ptr->I)])->COL = s_ptr->J;**/
826 z = (pairptr)MemNew(sizeof(pair));
827 z->COL = s_ptr->J;
828 z->NEXT = row[s_ptr->I];
829 row[s_ptr->I] = z;
830 if (midj < N) INS(N-midj)
831 }
832 return midc;
833 }
834
835 /* Divide: Find optimum midpoint (midi,midj) of cost midc */
836
837 midi = M/2; /* Forward phase: */
838 CC[0] = 0; /* Compute C(M/2,k) & D(M/2,k) for all k */
839 t = -q;
840 for (j = 1; j <= N; j++)
841 { CC[j] = t = t-r;
842 DD[j] = t-q;
843 }
844 t = -tb;
845 for (i = 1; i <= midi; i++)
846 { s = CC[0];
847 CC[0] = c = t = t-r;
848 e = t-q;
849 va = v[A[i]];
850 for (j = 1; j <= N; j++)
851 { if ((c = c - qr) > (e = e - r)) e = c;
852 if ((c = CC[j] - qr) > (d = DD[j] - r)) d = c;
853 DIAG(i+(s_ptr->I), (s_ptr->J)+j, c, s+v[A[i]][B[j]])
854 if (c < d) c = d;
855 if (c < e) c = e;
856 s = CC[j];
857 CC[j] = c;
858 DD[j] = d;
859 }
860 }
861 DD[0] = CC[0];
862
863 RR[N] = 0; /* Reverse phase: */
864 t = -q; /* Compute R(M/2,k) & S(M/2,k) for all k */
865 for (j = N-1; j >= 0; j--)
866 { RR[j] = t = t-r;
867 SS[j] = t-q;
868 }
869 t = -te;
870 for (i = M-1; i >= midi; i--)
871 { s = RR[N];
872 RR[N] = c = t = t-r;
873 e = t-q;
874 va = v[A[i+1]];
875 for (j = N-1; j >= 0; j--)
876 { if ((c = c - qr) > (e = e - r)) e = c;
877 if ((c = RR[j] - qr) > (d = SS[j] - r)) d = c;
878 DIAG(i+1+s_ptr->I, j+1+s_ptr->J, c, s+v[A[i+1]][B[j+1]])
879 if (c < d) c = d;
880 if (c < e) c = e;
881 s = RR[j];
882 RR[j] = c;
883 SS[j] = d;
884 }
885 }
886 SS[N] = RR[N];
887
888 midc = CC[0]+RR[0]; /* Find optimal midpoint */
889 midj = 0;
890 type = 1;
891 for (j = 0; j <= N; j++)
892 if ((c = CC[j] + RR[j]) >= midc)
893 if (c > midc || CC[j] != DD[j] && RR[j] == SS[j])
894 { midc = c;
895 midj = j;
896 }
897 for (j = N; j >= 0; j--)
898 if ((c = DD[j] + SS[j] + q) > midc)
899 { midc = c;
900 midj = j;
901 type = 2;
902 }
903
904 /* Conquer: recursively around midpoint */
905
906 if (type == 1)
907 { diff1(A,B,midi,midj,tb,q, CC, DD, RR, SS, value, s_ptr, row);
908 diff1(A+midi,B+midj,M-midi,N-midj,q,te, CC, DD, RR, SS, value, s_ptr, row);
909 }
910 else
911 { diff1(A,B,midi-1,midj,tb,(Int4)0, CC, DD, RR, SS, value, s_ptr, row);
912 DEL(s_ptr, 2);
913 diff1(A+midi+1,B+midj,M-midi-1,N-midj,(Int4)0,te, CC, DD, RR, SS, value, s_ptr, row);
914 }
915 return midc;
916 }
917
918
919 /* Determine the left and top boundaries of the recomputed area */
locate(CharPtr A,CharPtr B,Int4 nseq,matrix value,vecptr s_vec,BoolPtr flag,listptr plist,pairptr PNTR row)920 static void locate(CharPtr A, CharPtr B, Int4 nseq, matrix value, vecptr s_vec, BoolPtr flag, listptr plist, pairptr PNTR row)
921 {
922 register Int4 i, j; /* row and column indices */
923 register Int4 c; /* best score at current point */
924 register Int4 f; /* best score ending with insertion */
925 register Int4 d; /* best score ending with deletion */
926 register Int4 p; /* best score at (i-1, j-1) */
927 register Int4 ci, cj; /* end-point associated with c */
928 register Int4 di, dj; /* end-point associated with d */
929 register Int4 fi, fj; /* end-point associated with f */
930 register Int4 pi, pj; /* end-point associated with p */
931 short cflag, rflag; /* for recomputation */
932 Int2Ptr va; /* pointer to v(A[i], B[j]) */
933 Int4 limit; /* the bound on j */
934 Int4 q, r, qr; /*gap penalty*/
935 Int2Ptr PNTR v;
936 Boolean tt; /*flag for DIAG()*/
937 Int4 rl, cl;
938 /*pairptr z;*/ /* for DIAG()*/
939
940 q=value.q;
941 r=value.r;
942 qr=value.qr;
943 v=value.v;
944 /* Reverse pass
945 rows
946 s_vec->CC : the scores on the current row
947 s_vec->RR and s_vec->EE : the endpoints that lead to s_vec->CC
948 s_vec->DD : the deletion scores
949 s_vec->SS and s_vec->FF : the endpoints that lead to s_vec->DD
950
951 columns
952 s_vec->HH : the scores on the current columns
953 s_vec->II and s_vec->JJ : the endpoints that lead to s_vec->HH
954 s_vec->WW : the deletion scores
955 s_vec->XX and s_vec->YY : the endpoints that lead to s_vec->WW
956 */
957 for ( j = (plist->nn); j >= (plist->n1) ; j-- )
958 { s_vec->CC[j] = 0;
959 s_vec->EE[j] = j;
960 s_vec->DD[j] = - (q);
961 s_vec->FF[j] = j;
962 if ( nseq == 2 || j > (plist->mm) )
963 s_vec->RR[j] = s_vec->SS[j] = (plist->mm) + 1;
964 else
965 s_vec->RR[j] = s_vec->SS[j] = j;
966 }
967
968 for ( i = (plist->mm); i >= (plist->m1); i-- )
969 { c = p = 0;
970 f = - (q);
971 ci = fi = i;
972 pi = i + 1;
973 cj = fj = pj = (plist->nn) + 1;
974 va = v[A[i]];
975 if ( nseq == 2 || (plist->n1) > i )
976 limit = (plist->n1);
977 else
978 limit = i + 1;
979 for ( j = (plist->nn); j >= limit ; j-- )
980 { f = f - r;
981 c = c - qr;
982 ORDER(f, fi, fj, c, ci, cj)
983 c = s_vec->CC[j] - qr;
984 ci = s_vec->RR[j];
985 cj = s_vec->EE[j];
986 d = s_vec->DD[j] - r;
987 di = s_vec->SS[j];
988 dj = s_vec->FF[j];
989 ORDER(d, di, dj, c, ci, cj)
990 c = 0;
991 DIAG(i, j, c, p+va[B[j]]) /* diagonal */
992 if ( c <= 0 )
993 { c = 0; ci = i; cj = j; }
994 else
995 { ci = pi; cj = pj; }
996 ORDER(c, ci, cj, d, di, dj)
997 ORDER(c, ci, cj, f, fi, fj)
998 p = s_vec->CC[j];
999 s_vec->CC[j] = c;
1000 pi = s_vec->RR[j];
1001 pj = s_vec->EE[j];
1002 s_vec->RR[j] = ci;
1003 s_vec->EE[j] = cj;
1004 s_vec->DD[j] = d;
1005 s_vec->SS[j] = di;
1006 s_vec->FF[j] = dj;
1007 if ( c > (plist->min) )
1008 *flag = TRUE;
1009 }
1010 if ( nseq == 2 || i < (plist->n1) )
1011 { s_vec->HH[i] = s_vec->CC[(plist->n1)];
1012 s_vec->II[i] = s_vec->RR[(plist->n1)];
1013 s_vec->JJ[i] = s_vec->EE[(plist->n1)];
1014 s_vec->WW[i] = f;
1015 s_vec->XX[i] = fi;
1016 s_vec->YY[i] = fj;
1017 }
1018 }
1019
1020 for ( rl = (plist->m1), cl = (plist->n1); ; )
1021 { for ( rflag = cflag = 1; ( rflag && (plist->m1) > 1 ) || ( cflag && (plist->n1) > 1 ) ; )
1022 { if ( rflag && (plist->m1) > 1 ) /* Compute one row */
1023 { rflag = 0;
1024 (plist->m1)--;
1025 c = p = 0;
1026 f = - (q);
1027 ci = fi = (plist->m1);
1028 pi = (plist->m1) + 1;
1029 cj = fj = pj = (plist->nn) + 1;
1030 va = v[A[(plist->m1)]];
1031 for ( j = (plist->nn); j >= (plist->n1) ; j-- )
1032 { f = f - r;
1033 c = c - qr;
1034 ORDER(f, fi, fj, c, ci, cj)
1035 c = s_vec->CC[j] - qr;
1036 ci = s_vec->RR[j];
1037 cj = s_vec->EE[j];
1038 d = s_vec->DD[j] - r;
1039 di = s_vec->SS[j];
1040 dj = s_vec->FF[j];
1041 ORDER(d, di, dj, c, ci, cj)
1042 c = 0;
1043 DIAG((plist->m1), j, c, p+va[B[j]]) /* diagonal */
1044 if ( c <= 0 )
1045 { c = 0; ci = (plist->m1); cj = j; }
1046 else
1047 { ci = pi; cj = pj; }
1048 ORDER(c, ci, cj, d, di, dj)
1049 ORDER(c, ci, cj, f, fi, fj)
1050 p = s_vec->CC[j];
1051 s_vec->CC[j] = c;
1052 pi = s_vec->RR[j];
1053 pj = s_vec->EE[j];
1054 s_vec->RR[j] = ci;
1055 s_vec->EE[j] = cj;
1056 s_vec->DD[j] = d;
1057 s_vec->SS[j] = di;
1058 s_vec->FF[j] = dj;
1059 if ( c > (plist->min) )
1060 *flag = TRUE;
1061 if ( ! rflag && ( ci > rl && cj > cl || di > rl && dj > cl
1062 || fi > rl && fj > cl ) )
1063 rflag = 1;
1064 }
1065 s_vec->HH[(plist->m1)] = s_vec->CC[(plist->n1)];
1066 s_vec->II[(plist->m1)] = s_vec->RR[(plist->n1)];
1067 s_vec->JJ[(plist->m1)] = s_vec->EE[(plist->n1)];
1068 s_vec->WW[(plist->m1)] = f;
1069 s_vec->XX[(plist->m1)] = fi;
1070 s_vec->YY[(plist->m1)] = fj;
1071 if ( ! cflag && ( ci > rl && cj > cl || di > rl && dj > cl
1072 || fi > rl && fj > cl ) )
1073 cflag = 1;
1074 }
1075
1076 if ( nseq == 1 && (plist->n1) == ((plist->m1) + 1) && ! rflag )
1077 cflag = 0;
1078 if ( cflag && (plist->n1) > 1 ) /* Compute one column */
1079 { cflag = 0;
1080 (plist->n1)--;
1081 c = 0;
1082 f = - (q);
1083 cj = fj = (plist->n1);
1084 va = v[B[(plist->n1)]];
1085 if ( nseq == 2 || (plist->mm) < (plist->n1) )
1086 { p = 0;
1087 ci = fi = pi = (plist->mm) + 1;
1088 pj = (plist->n1) + 1;
1089 limit = (plist->mm);
1090 }
1091 else
1092 { p = s_vec->HH[(plist->n1)];
1093 pi = s_vec->II[(plist->n1)];
1094 pj = s_vec->JJ[(plist->n1)];
1095 ci = fi = (plist->n1);
1096 limit = (plist->n1) - 1;
1097 }
1098 for ( i = limit; i >= (plist->m1) ; i-- )
1099 { f = f - r;
1100 c = c - qr;
1101 ORDER(f, fi, fj, c, ci, cj)
1102 c = s_vec->HH[i] - qr;
1103 ci = s_vec->II[i];
1104 cj = s_vec->JJ[i];
1105 d = s_vec->WW[i] - r;
1106 di = s_vec->XX[i];
1107 dj = s_vec->YY[i];
1108 ORDER(d, di, dj, c, ci, cj)
1109 c = 0;
1110 DIAG(i, (plist->n1), c, p+va[A[i]])
1111 if ( c <= 0 )
1112 { c = 0; ci = i; cj = (plist->n1); }
1113 else
1114 { ci = pi; cj = pj; }
1115 ORDER(c, ci, cj, d, di, dj)
1116 ORDER(c, ci, cj, f, fi, fj)
1117 p = s_vec->HH[i];
1118 s_vec->HH[i] = c;
1119 pi = s_vec->II[i];
1120 pj = s_vec->JJ[i];
1121 s_vec->II[i] = ci;
1122 s_vec->JJ[i] = cj;
1123 s_vec->WW[i] = d;
1124 s_vec->XX[i] = di;
1125 s_vec->YY[i] = dj;
1126 if ( c > (plist->min) )
1127 *flag = TRUE;
1128 if ( ! cflag && ( ci > rl && cj > cl || di > rl && dj > cl
1129 || fi > rl && fj > cl ) )
1130 cflag = 1;
1131 }
1132 s_vec->CC[(plist->n1)] = s_vec->HH[(plist->m1)];
1133 s_vec->RR[(plist->n1)] = s_vec->II[(plist->m1)];
1134 s_vec->EE[(plist->n1)] = s_vec->JJ[(plist->m1)];
1135 s_vec->DD[(plist->n1)] = f;
1136 s_vec->SS[(plist->n1)] = fi;
1137 s_vec->FF[(plist->n1)] = fj;
1138 if ( ! rflag && ( ci > rl && cj > cl || di > rl && dj > cl
1139 || fi > rl && fj > cl ) )
1140 rflag = 1;
1141 }
1142 }
1143
1144 if ( (plist->m1) == 1 && (plist->n1) == 1 || no_cross(flag, plist, &rl, &cl) )
1145 break;
1146 }
1147 (plist->m1)--;
1148 (plist->n1)--;
1149 }
1150
1151 /* recompute the area on forward pass */
small_pass(CharPtr A,CharPtr B,Int4 count,Int4 nseq,matrix value,vecptr s_vec,listptr plist,pairptr PNTR row)1152 static void small_pass(CharPtr A, CharPtr B, Int4 count, Int4 nseq, matrix value, vecptr s_vec, listptr plist, pairptr PNTR row)
1153 {
1154 register Int4 i, j; /* row and column indices */
1155 register Int4 c; /* best score at current point */
1156 register Int4 f; /* best score ending with insertion */
1157 register Int4 d; /* best score ending with deletion */
1158 register Int4 p; /* best score at (i-1, j-1) */
1159 register Int4 ci, cj; /* end-point associated with c */
1160 register Int4 di, dj; /* end-point associated with d */
1161 register Int4 fi, fj; /* end-point associated with f */
1162 register Int4 pi, pj; /* end-point associated with p */
1163 Int2Ptr va; /* pointer to v(A[i], B[j]) */
1164 Int4 limit; /* lower bound on j */
1165 Int4 q, r, qr; /* gap penalties*/
1166 Int2Ptr PNTR v; /* matrix*/
1167 Boolean tt; /*flag for DIAG() */
1168 /*pairptr z;*/ /*for process DIAG()*/
1169
1170 q=value.q;
1171 r=value.r;
1172 qr=value.qr;
1173 v=value.v;
1174 for ( j = (plist->n1) + 1; j <= (plist->nn) ; j++ )
1175 { s_vec->CC[j] = 0;
1176 s_vec->RR[j] = (plist->m1);
1177 s_vec->EE[j] = j;
1178 s_vec->DD[j] = - (q);
1179 s_vec->SS[j] = (plist->m1);
1180 s_vec->FF[j] = j;
1181 }
1182 for ( i = (plist->m1) + 1; i <= (plist->mm); i++)
1183 { c = 0; /* Initialize column 0 */
1184 f = - (q);
1185 ci = fi = i;
1186 va = v[A[i]];
1187 if ( nseq == 2 || i <= (plist->n1) )
1188 { p = 0;
1189 pi = i - 1;
1190 cj = fj = pj = (plist->n1);
1191 limit = (plist->n1) + 1;
1192 }
1193 else
1194 { p = s_vec->CC[i];
1195 pi = s_vec->RR[i];
1196 pj = s_vec->EE[i];
1197 cj = fj = i;
1198 limit = i + 1;
1199 }
1200 for ( j = limit ; j <= (plist->nn) ; j++ )
1201 { f = f - r;
1202 c = c - qr;
1203 ORDER(f, fi, fj, c, ci, cj)
1204 c = s_vec->CC[j] - qr;
1205 ci = s_vec->RR[j];
1206 cj = s_vec->EE[j];
1207 d = s_vec->DD[j] - r;
1208 di = s_vec->SS[j];
1209 dj = s_vec->FF[j];
1210 ORDER(d, di, dj, c, ci, cj)
1211 c = 0;
1212 DIAG(i, j, c, p+va[B[j]]) /* diagonal */
1213 if ( c <= 0 )
1214 { c = 0; ci = i; cj = j; }
1215 else
1216 { ci = pi; cj = pj; }
1217 ORDER(c, ci, cj, d, di, dj)
1218 ORDER(c, ci, cj, f, fi, fj)
1219 p = s_vec->CC[j];
1220 s_vec->CC[j] = c;
1221 pi = s_vec->RR[j];
1222 pj = s_vec->EE[j];
1223 s_vec->RR[j] = ci;
1224 s_vec->EE[j] = cj;
1225 s_vec->DD[j] = d;
1226 s_vec->SS[j] = di;
1227 s_vec->FF[j] = dj;
1228 if ( c > (plist->min) ) /* add the score into list */
1229 (plist->min) = addnode(c, ci, cj, i, j, count, plist);
1230 }
1231 }
1232 }
1233
1234 /* Add a new node into list. */
1235
addnode(Int4 c,Int4 ci,Int4 cj,Int4 i,Int4 j,Int4 K,listptr plist)1236 static Int4 addnode(Int4 c, Int4 ci, Int4 cj, Int4 i, Int4 j, Int4 K, listptr plist)
1237 {
1238 Boolean found; /* 1 if the node is in (plist->LIST) */
1239 register Int4 d;
1240 Int4 cost;
1241
1242 found = FALSE;
1243 cost=plist->min;
1244
1245 if ( (plist->most) != 0 && (plist->most)->STARI == ci && (plist->most)->STARJ == cj )
1246 found = TRUE;
1247 else
1248 for ( d = 0; d < (plist->numnode) ; d++ )
1249 { (plist->most) = (plist->LIST)[d];
1250 if ( (plist->most)->STARI == ci && (plist->most)->STARJ == cj )
1251 { found = TRUE;
1252 break;
1253 }
1254 }
1255 if ( found )
1256 { if ( (plist->most)->SCORE < c )
1257 { (plist->most)->SCORE = c;
1258 (plist->most)->ENDI = i;
1259 (plist->most)->ENDJ = j;
1260 }
1261 if ( (plist->most)->TOP > i ) (plist->most)->TOP = i;
1262 if ( (plist->most)->BOT < i ) (plist->most)->BOT = i;
1263 if ( (plist->most)->LEFT > j ) (plist->most)->LEFT = j;
1264 if ( (plist->most)->RIGHT < j ) (plist->most)->RIGHT = j;
1265 }
1266 else
1267 { if ( (plist->numnode) == K ) /* list full */
1268 (plist->most) = (plist->low);
1269 else
1270 (plist->most) = (plist->LIST)[(plist->numnode)++];
1271 (plist->most)->SCORE = c;
1272 (plist->most)->STARI = ci;
1273 (plist->most)->STARJ = cj;
1274 (plist->most)->ENDI = i;
1275 (plist->most)->ENDJ = j;
1276 (plist->most)->TOP = (plist->most)->BOT = i;
1277 (plist->most)->LEFT = (plist->most)->RIGHT = j;
1278 }
1279 if ( (plist->numnode) == K )
1280 { if ( (plist->low) == (plist->most) || ! (plist->low) )
1281 { for ( (plist->low) = (plist->LIST)[0], d = 1; d < (plist->numnode) ; d++ )
1282 if ( (plist->LIST)[d]->SCORE < (plist->low)->SCORE )
1283 (plist->low) = (plist->LIST)[d];
1284 }
1285 return ( (plist->low)->SCORE ) ;
1286 }
1287 else
1288 return cost;
1289 }
1290
1291 /* Find and remove the largest score in list */
1292
findmax(listptr plist)1293 static vertexptr findmax(listptr plist)
1294 {
1295 vertexptr cur;
1296 register Int4 i, j;
1297
1298 for ( j = 0, i = 1; i < (plist->numnode) ; i++ )
1299 if ( (plist->LIST)[i]->SCORE > (plist->LIST)[j]->SCORE )
1300 j = i;
1301 cur = (plist->LIST)[j];
1302 if ( j != --(plist->numnode) )
1303 { (plist->LIST)[j] = (plist->LIST)[(plist->numnode)];
1304 (plist->LIST)[(plist->numnode)] = cur;
1305 }
1306 (plist->most) = (plist->LIST)[0];
1307 if ( (plist->low) == cur ) (plist->low) = (plist->LIST)[0];
1308 return ( cur );
1309 }
1310
1311 /* return 1 if no node in (plist->LIST) share vertices with the area */
1312
no_cross(BoolPtr flag,listptr plist,Int4Ptr rl,Int4Ptr cl)1313 static Boolean no_cross(BoolPtr flag, listptr plist, Int4Ptr rl, Int4Ptr cl)
1314 {
1315 vertexptr cur;
1316 register Int4 i;
1317
1318 for ( i = 0; i < (plist->numnode); i++ )
1319 { cur = (plist->LIST)[i];
1320 if ( cur->STARI <= (plist->mm) && cur->STARJ <= (plist->nn) && cur->BOT >= (plist->m1)-1 &&
1321 cur->RIGHT >= (plist->n1)-1 && ( cur->STARI < *rl || cur->STARJ < *cl ))
1322 { if ( cur->STARI < *rl ) *rl = cur->STARI;
1323 if ( cur->STARJ < *cl ) *cl = cur->STARJ;
1324 *flag = TRUE;
1325 break;
1326 }
1327 }
1328 if ( i == (plist->numnode) )
1329 return 1;
1330 else
1331 return 0;
1332 }
1333
1334 /* Alignment display routine */
1335
display(CharPtr A,CharPtr B,Int4 M,Int4 N,Int4Ptr S,Int4 AP,Int4 BP,Int4Ptr rec_a1,Int4Ptr rec_a2,Int2Ptr POS)1336 static void display(CharPtr A, CharPtr B, Int4 M, Int4 N, Int4Ptr S, Int4 AP, Int4 BP, Int4Ptr rec_a1, Int4Ptr rec_a2, Int2Ptr POS)
1337 {
1338 Int4 i, j, op, start_i, start_j, match, mis_match;
1339
1340 for (i = j = 0; i < M || j < N; ) {
1341 start_i = i;
1342 start_j = j;
1343 match = mis_match = 0;
1344 while (i < M && j < N && *S == 0) {
1345 ++i;
1346 ++j;
1347 if (A[i] == B[j])
1348 ++match;
1349 else
1350 ++mis_match;
1351 S++;
1352 }
1353 rec_a1[*POS] = AP+start_i;
1354 rec_a2[*POS] = BP+start_j;
1355 ++*POS;
1356 rec_a1[*POS] = AP+i-1;
1357 rec_a2[*POS] = BP+j-1;
1358 ++*POS;
1359
1360 if (i < M || j < N)
1361 if ((op = *S++) > 0)
1362 j += op;
1363 else
1364 i -= op;
1365 }
1366 }
1367
1368
1369