1 static char const rcsid[] = "$Id: sim4.c,v 6.5 2003/05/30 17:25:38 coulouri Exp $";
2 
3 /* sim4.c - an algorithm to align a cDNA sequence with a genomic sequence.
4 
5    *Note:
6          - when compiled with flag -DDEBUG, it gives information on the
7            (intermediate and final) block decomposition of  the sequences
8          - when compiled with flag -DSTATS, it generates the actual
9            alignment and gives the alignment scores on exon segments
10  */
11 
12 
13 /* #include <stdio.h> */
14 /* #include <string.h> */
15 /* #include <ctype.h> */
16 /* #include <math.h> */
17 
18 #include <ncbi.h>
19 #include <simutil.h>
20 
21 #define DEFAULT_W       8
22 #define D_MASK          65535	/* distribution mask : 2^16-1 */
23 #define MIN_EXON_SIZE   W/2
24 #define DIST_CUTOFF     4
25 #define PERCENT         .2
26 #define CUTOFF          .2
27 #define LL              60
28 #define M1              M-W+1
29 #define N1              N-W+1
30 #define NW              5	/* number of words used in the heuristic phase from sim3 */
31 
32 #define MAXINT          65535
33 #define ALPHA           .5
34 #define BETA            .5
35 
36 
37 #define MININT   -99999
38 #define DEF_LIMIT 1000
39 
40 typedef struct block {
41   Int4 init_pos1;
42   Int4 init_pos2;
43   Int4 end_pos1;
44   Int4 end_pos2;
45   Int4 next;
46 } Block;
47 
48 
49 
50 typedef struct table {
51   Uint4 ecode;
52   Int4 pos;
53   struct table *next;
54 } Table;
55 
56 
57 typedef struct matchelt {
58   Int4   mblock;
59   Table *mref;
60   struct matchelt *mnext;
61 } Matchelt;
62 
63 
64 typedef struct links {
65   Matchelt *lptr;
66   struct links *lnext;
67 } Links;
68 
69 
70 typedef struct threshelt {
71   Int4   tblock;		/* block in seq2; a value between 1..M-W+1 */
72   Table *tref;
73 } Threshelt;
74 
75 
76 struct edit {
77   struct edit *link;		/* previous edit instruction */
78   Int4 op;			/* INS or DEL */
79   Int4 line1;			/* line number in file1 */
80   Int4 line2;			/* line number in file2 */
81 };
82 
83 typedef struct {
84   Int1Ptr seq1, seq2;
85   Int4 M, N, W;
86   Int4 encoding[128];
87   Int4 decoding[5];
88   Int4 b_offset;
89   Int4 limit;
90 } data_t;
91 
92 #define MAX_BLOCKS      10000
93 typedef struct sim4global {	/*the global variables used in SIM4*/
94 	Block *Blist;
95 	Table ***pos_tbl;
96 	Table **lists;
97 	Links **Link;
98 	Matchelt **Matchlist;
99 }SIM4Global, PNTR SIM4GlobalPtr;
100 
101 
102 static Int4 segment(Int1Ptr, Int1Ptr, Int4, Int4, Int4, Int4, Int4, data_t *, SIM4GlobalPtr);
103 static void add_word(Uint4, Int4, SIM4GlobalPtr);
104 static Boolean init_tbl(Int4, SIM4GlobalPtr);
105 static void bld_table(Int1Ptr, Int4, Int4, data_t *, SIM4GlobalPtr);
106 static void bld_lists(Int1Ptr, Int4, Int4, data_t *, SIM4GlobalPtr);
107 static void create_mlists(Int4, Int4, Int4, SIM4GlobalPtr);
108 static void lisseq(Int4, Int4, Int4, Links **, SIM4GlobalPtr, ValNodePtr PNTR);
109 static Int4 btfind(Int4, Threshelt *, Int4);
110 static Int4 greedy(Int1Ptr, Int1Ptr, Int4, Int4, Int4, Int4,
111 		   Int4Ptr PNTR, Int4Ptr PNTR, Int4Ptr, data_t *, SIM4GlobalPtr);
112 static Int4 bgreedy(Int1Ptr, Int1Ptr, Int4, Int4, Int4, Int4, data_t *, SIM4GlobalPtr);
113 static Int4 bput_scr(struct edit *, Int4, Int4, Int4, Int4, Int4, Int4, Int4, SIM4GlobalPtr);
114 static Int4 fgreedy(Int1Ptr, Int1Ptr, Int4, Int4, Int4, Int4, data_t *, SIM4GlobalPtr);
115 static Int4 fput_scr(struct edit *, Int4, Int4, Int4, Int4, Int4, SIM4GlobalPtr);
116 static Int4 find_blocks PROTO((Int4 W, Links *lk, Int4 offset1, Int4 offset2,
117 							  data_t *data, SIM4GlobalPtr sgp));
118 static Int4 power2(Int4 n);
119 static Boolean sim4x(data_t *, SIM4GlobalPtr );
120 static Int4 snake(Int4, Int4, Int4, Int4, data_t *);
121 static Int4 rsnake(Int4, Int4, Int4, Int4, data_t *);
122 static void path(Int4, Int4, Int1, Int4, Int4, Int1, Int4,
123 		 EditScriptPtr PNTR, EditScriptPtr PNTR, data_t *);
124 
125 static Int1 locate(Int4Ptr, Int4Ptr, Int4Ptr, Int4Ptr, Int1Ptr, Int1Ptr,
126 		   Int4, Int4, Int4, data_t *data);
127 static Int4 get_dist(Int4, Int4, Int4, Int4, data_t *data);
128 static void Condense_script(EditScriptPtr);
129 static void Reverse_Script(EditScriptPtr);
130 
131 static EditScriptPtr SIM4 PROTO((Int1Ptr A, Int1Ptr B, Int4 len1, Int4 len2,
132                     Int4 INPUT_W, Int4 LIMIT, Int4Ptr dist_ptr));
133 static void S2A(EditScriptPtr, Int4Ptr);
134 static void DISPLAY(Int1Ptr, Int1Ptr, Int4, Int4, Int4Ptr, Int4, Int4);
135 static void Print_Script(EditScriptPtr head, Int4 M, Int4 N, data_t *data);
136 
137 
SIM4ALN_choice(SeqLocPtr cslp1,SeqLocPtr cslp2,Int4 limit,Int4 word_size)138 SeqAlignPtr SIM4ALN_choice(SeqLocPtr cslp1, SeqLocPtr cslp2, Int4 limit, Int4 word_size)
139 {
140 	SeqAlignPtr sap = NULL;
141 	Boolean both_strand, is_dna;
142 	Uint1 strand_2, strand_1;
143 	CharPtr A, B;
144 	Int4 len1, len2;
145 	EditScriptPtr esp, c_esp;
146 	Int4 dist = -1, c_dist;
147 	Char buf[21];
148 
149 
150 	if(cslp1 == NULL || cslp2 == NULL)
151 	{
152 		ErrPostEx(SEV_WARNING, 0, 0, "Insufficient input");
153 		return NULL;
154 	}
155 
156 	if(cslp1->choice != SEQLOC_INT || cslp2->choice !=SEQLOC_INT)
157 	{
158 		ErrPostEx(SEV_WARNING, 0, 0, "Incorrect type of Seq-loc. Only take Seq-int");
159 		return NULL;
160 	}
161 	strand_1 = SeqLocStrand(cslp1);
162 	strand_2 = SeqLocStrand(cslp2);
163 	both_strand = check_strand_mol(cslp2, &is_dna);
164 	if(!is_dna)
165 	{
166 		ErrPostEx(SEV_WARNING, 0, 0, "Sorry, SIM4 can only compare two DNA sequences");
167 		return NULL;
168 	}
169 
170 	if(both_strand)
171 	{
172 		Change_Loc_Strand(cslp1, Seq_strand_plus);
173 		Change_Loc_Strand(cslp2, Seq_strand_plus);
174 	}
175 
176 	A= make_sim_seq(cslp1, TRUE, NULL);
177 	if(A == NULL)
178 	{
179 		Change_Loc_Strand(cslp1, strand_1);
180 		Change_Loc_Strand(cslp2, strand_2);
181 		SeqIdWrite(SeqLocId(cslp1), buf, PRINTID_FASTA_LONG, 20);
182       	ErrPostEx(SEV_WARNING, 0, 0, "Fail to make the first sequence %s", buf);
183 		return NULL;
184 	}
185 
186 	B = make_sim_seq(cslp2, FALSE, NULL);
187 	if(B == NULL)
188 	{
189 		MemFree(A);
190 		Change_Loc_Strand(cslp1, strand_1);
191 		Change_Loc_Strand(cslp2, strand_2);
192 		SeqIdWrite(SeqLocId(cslp2), buf, PRINTID_FASTA_LONG, 20);
193       	ErrPostEx(SEV_WARNING, 0, 0, "Fail to make the second sequence %s", buf);
194 		return NULL;
195 	}
196 
197 	len1 = SeqLocLen(cslp1);
198 	len2 = SeqLocLen(cslp2);
199 
200 	if(limit <= 0)
201 		limit = DEF_LIMIT;
202 	word_size = MAX(DEFAULT_W, word_size);
203 	esp = SIM4((Int1Ptr)(A+1), (Int1Ptr)(B+1), len1, len2,word_size, limit,&dist);
204 	if(both_strand)
205 	{
206 		Change_Loc_Strand(cslp2,(Uint1)2);
207 		make_sim_seq(cslp2, FALSE, B);
208 		c_esp = SIM4((Int1Ptr)(A+1), (Int1Ptr)(B+1), len1, len2,word_size, limit,&c_dist);
209 		if(c_esp != NULL)
210 		{
211 			if(esp == NULL || c_dist < dist)
212 			{
213 				Script_free(esp);
214 				esp = c_esp;
215 				dist = c_dist;
216 			}
217 			else
218 			{
219 				Change_Loc_Strand (cslp2, Seq_strand_plus);
220 				Script_free(c_esp);
221 			}
222 		}
223 		else
224 			Change_Loc_Strand (cslp2, Seq_strand_plus);
225 	}
226 
227 
228 	if(esp != NULL)
229 	{
230 		sap = ConvertEditScript(esp, cslp1, cslp2, 0, 0, FALSE);
231 		Script_free(esp);
232 	}
233 	else
234 		sap = NULL;
235 	Change_Loc_Strand(cslp1, strand_1);
236 	Change_Loc_Strand(cslp2, strand_2);
237 	MemFree(A);
238 	MemFree(B);
239 	return sap;
240 
241 }
242 
243 
power2(Int4 n)244 static Int4 power2(Int4 n)
245 {
246   if(n < 0) {
247      ErrPostEx(SEV_FATAL, 1, 0,
248 	       "Positive integer power only. Perhaps invalid data.");
249      exit(1);
250   }
251 
252   if(!n) return 1;
253   else return 2 * power2(n - 1);
254 }
255 
init_tbl(Int4 W,SIM4GlobalPtr sgp)256 static Boolean init_tbl(Int4 W, SIM4GlobalPtr sgp)
257 {
258   Int4 i, j, No_branches;
259 
260   sgp->Blist = MemGet((size_t)MAX_BLOCKS * sizeof(Block), MGET_ERRPOST);
261   if(sgp->Blist == NULL)
262 	  return FALSE;
263   MemSet((Pointer)(sgp->Blist), 0, MAX_BLOCKS * sizeof(Block));
264   No_branches = power2(MAX(0, 2 * W - 16));
265   sgp->pos_tbl = (Table ***)MemNew(No_branches * sizeof(Table **));
266 
267   for(i = 0; i < No_branches; i++) {
268     sgp->pos_tbl[i] = (Table **)MemNew((D_MASK + 1) * sizeof(Table *));
269     for(j = 0; j <= D_MASK; j++)
270       sgp->pos_tbl[i][j] = NULL;
271   }
272   return TRUE;
273 }
274 
bld_table(Int1Ptr s1,Int4 M,Int4 W,data_t * data,SIM4GlobalPtr sgp)275 static void bld_table(Int1Ptr s1, Int4 M, Int4 W, data_t *data, SIM4GlobalPtr sgp)
276 {
277   Uint4 ecode;
278   Int4 i, si, temp, mask;
279   Int1Ptr q;
280 
281   /* initializing ... */
282 
283   for(i = 0; i < 128; i++) data->encoding[i] = 0;
284 
285   data->encoding['A'] = data->encoding['a'] = 0;
286   data->encoding['C'] = data->encoding['c'] = 1;
287   data->encoding['G'] = data->encoding['g'] = 2;
288   data->encoding['T'] = data->encoding['t'] = 3;
289   data->decoding[0] = 'A';
290   data->decoding[1] = 'C';
291   data->decoding[2] = 'G';
292   data->decoding[3] = 'T';
293 
294   mask = (1 << (W + W - 2)) - 1;
295 
296   q = s1 - 1;
297   ecode = 0L;
298   for(si = i = 1; i < W; ++si)
299     if((temp = data->encoding[*++q]) != 4) {
300       ecode = (ecode << 2) + temp;
301       i++;
302     }
303   for(; (*++q) && (si <= M); si++)
304     if((temp = data->encoding[*q]) != 4) {
305       ecode = ((ecode & mask) << 2) + temp;
306       add_word(ecode, (Int4)(q - s1 + 1), sgp);
307     }
308 }
309 
add_word(Uint4 ecode,Int4 pos,SIM4GlobalPtr sgp)310 static void add_word(Uint4 ecode, Int4 pos, SIM4GlobalPtr sgp)
311 {
312   Table *ptr;
313 
314   ptr = (Table *)MemNew(sizeof(Table));
315   ptr->ecode = ecode;
316   ptr->pos = pos;
317   ptr->next = sgp->pos_tbl[ecode >> 16][ecode&D_MASK];
318   sgp->pos_tbl[ecode >> 16][ecode&D_MASK] = ptr;
319 }
320 
clean_up_global_val(SIM4GlobalPtr sgp)321 static void clean_up_global_val(SIM4GlobalPtr sgp)
322 {
323 
324 	MemFree(sgp->Link);
325 	MemFree(sgp->lists);
326 	MemFree(sgp->Blist);
327 
328 }
329 
SIM4(Int1Ptr in_seq1,Int1Ptr in_seq2,Int4 in_M,Int4 in_N,Int4 in_W,Int4 LIMIT,Int4Ptr dist_ptr)330 static EditScriptPtr SIM4(Int1Ptr in_seq1, Int1Ptr in_seq2,
331 		  Int4 in_M, Int4 in_N,
332 		  Int4 in_W, Int4 LIMIT, Int4Ptr dist_ptr)
333 {
334   data_t data;
335   Int4 sflag = FALSE;
336   Int4 i, i1, diff;
337   Int1Ptr aux;
338   EditScriptPtr head, tail, left, right, new;
339    SIM4Global sg;
340 
341 
342   data.limit = LIMIT;
343 
344   /* initialize global variables ... */
345   data.seq1 = in_seq1;
346   data.seq2 = in_seq2;
347   data.M = in_M;
348   data.N = in_N;
349   data.W = in_W;
350   data.b_offset = 0;
351 
352   if(data.M > data.N) {
353     aux = data.seq1;
354     data.seq1 = data.seq2;
355     data.seq2 = aux;
356 
357     i = data.N;
358     data.N = data.M;
359     data.M = i;
360 
361     sflag = TRUE;
362   }
363 
364   /* Compute the distance between two sequences A and B */
365 
366   /* printf("A heuristic phase 1 is tried.\n");	*/
367 
368   if(!data.limit)
369 	  data.limit = DEF_LIMIT;
370   /* printf("distance limit = %d\n", data.limit); */
371   *dist_ptr = 0;
372 
373   if(!sim4x(&data, &sg))
374 	  return NULL;
375   i = 0; head = NULL;
376 
377   while((i1 = sg.Blist[i].next) && sg.Blist[i1].end_pos1) {
378     diff = get_dist(sg.Blist[i1].init_pos1 - 1,
379 		    sg.Blist[i1].init_pos2 - 1,
380 		    sg.Blist[i1].end_pos1,
381 		    sg.Blist[i1].end_pos2,
382 		    &data);
383 	if(diff == -1)
384 	{
385 		clean_up_global_val(&sg);
386 		Script_free(head);
387 		return NULL;
388 	}
389 
390     /* if(diff > MAX((Int4)(PERCENT * (sg.Blist[i1].end_pos1 - sg.Blist[i1].init_pos1 + 1)), 2 * data.W))
391 	  ErrPostEx(SEV_WARNING, 0, 0,
392 	       "Distance threshold on segment exceeded."); */
393 
394       /* printf("Warning: Distance threshold on segment exceeded.\n"); */
395 
396     path(sg.Blist[i1].init_pos1 - 1, sg.Blist[i1].init_pos2 - 1, SUB,
397 	 sg.Blist[i1].end_pos1, sg.Blist[i1].end_pos2, SUB,
398 	 diff, &left, &right, &data);
399     *dist_ptr += diff;
400 
401     if((diff = sg.Blist[i].init_pos1 - sg.Blist[i1].end_pos1 - 1) != 0) {
402       new = (EditScriptPtr) MemNew(sizeof(EditScript));
403       new->op_type = DEL;
404       new->num = diff;
405       new->next = head;
406       head = new;
407       *dist_ptr += diff;
408     }
409     if((diff = sg.Blist[i].init_pos2 - sg.Blist[i1].end_pos2 - 1) != 0) {
410       new = (EditScriptPtr) MemNew(sizeof(EditScript));
411       new->op_type = INS;
412       new->num = diff;
413       new->next = head;
414       head = new;
415     }
416     right->next = head;
417     head = left;
418     i = i1;
419   }
420   if((diff = sg.Blist[i].init_pos2 - sg.Blist[i1].end_pos2 - 1) != 0) {
421     new = (EditScriptPtr) MemNew(sizeof(EditScript));
422     new->op_type = INS;
423     new->num = diff;
424     new->next = head;
425     head = new;
426   }
427   if((diff = sg.Blist[i].init_pos1 - sg.Blist[i1].end_pos1 - 1) != 0) {
428     new = (EditScriptPtr) MemNew(sizeof(EditScript));
429     new->op_type = DEL;
430     new->num = diff;
431     new->next = head;
432     head = new;
433     *dist_ptr += diff;
434   }
435 
436   /* Merge the contiguous same operations together */
437   Condense_script(head);
438 
439   /* Reset tail (due to the condensation) */
440   tail = head;
441   while(tail->next != NULL) tail = tail->next;
442 
443 #ifdef STATS
444   /*printf("distance=%d\n", *dist_ptr); */
445   if(*dist_ptr > limit)
446 	  ErrPostEx(SEV_WARNING, 0, 0, "distance exceeds limit!");
447     /* printf("\nWarning: distance exceeds limit!\n\n"); */
448 #endif
449 
450   if(sflag == TRUE) {    /* Switch back the sequences */
451     aux = data.seq1;
452     data.seq1 = data.seq2;
453     data.seq2 = aux;
454     i = data.M;
455     data.M = data.N;
456     data.N = i;
457     Reverse_Script(head);
458   }
459 
460   /* Print_Script(head, data.M, data.N, &data); */
461   clean_up_global_val(&sg);
462 
463   return head;
464 }
465 
466 /* locate possible exons and form block list */
sim4x(data_t * data,SIM4GlobalPtr sgp)467 static Boolean sim4x(data_t *data, SIM4GlobalPtr sgp)
468 {
469   Int4 bno, i, i1, diff, dist;
470   Int4Ptr last, first;
471 
472   Int4 I1, I2, J1, J2;
473   Int4 offset1, offset2;
474   Int1 hflag;
475   Int4 p1, p2, length1, length2;
476 
477 #ifdef XFLAG
478   Int4 ulcell, u, l;
479   Int4 j, mcell, mismatch;
480   Int1 temp[50];
481 #endif
482 
483   if(data->M < data->W)  data->W = data->M;
484   if(!init_tbl(data->W, sgp))
485 	  return FALSE;
486 
487   bno = segment(data->seq1, data->seq2, data->M, data->N, data->W, 0, 0, data, sgp);
488 
489   sgp->Blist[0].init_pos1 = data->M + 1;
490   sgp->Blist[0].init_pos2 = data->N + 1;
491   sgp->Blist[bno + 1].end_pos1 = sgp->Blist[bno + 1].end_pos2 = 0;
492   sgp->Blist[bno].next = bno + 1;
493   sgp->Blist[bno + 1].next = 0;
494   data->b_offset = bno + 2;
495 
496   /* skip small blocks, as potential error source */
497   /* external thresholding: begin sequence */
498   i = data->b_offset - 1;
499   while(i && (sgp->Blist[i].end_pos1 - sgp->Blist[i].init_pos1 + 1 < 2.0 * data->W))
500     i--;
501   sgp->Blist[i].next = data->b_offset - 1;
502 
503   /* external thresholding: end sequence */
504   i = sgp->Blist[0].next;
505   while(sgp->Blist[i].end_pos1 && (sgp->Blist[i].end_pos1 - sgp->Blist[i].init_pos1 + 1 < 2 * data->W))
506     i++;
507   sgp->Blist[0].next = i;
508 
509   /* internal thresholding */
510   i = 0;
511   while((i1 = sgp->Blist[i].next) && (sgp->Blist[i1].end_pos1)) {
512     if(sgp->Blist[i1].end_pos1 - sgp->Blist[i1].init_pos1 + 1 < data->W)
513       sgp->Blist[i].next = sgp->Blist[i1].next;
514 /* =========   HERE STARTS THE NEW CODE =========== */
515 
516       else if (sgp->Blist[i1].end_pos1-sgp->Blist[i1].init_pos1+1<2*data->W) {
517 
518                Int4  tmp = sgp->Blist[i1].next;
519 
520                /* extend block i to the left */
521                p1 = sgp->Blist[i].init_pos1-1;
522                p2 = sgp->Blist[i].init_pos2-1;
523                while ((p1>0) && (p2>0) &&
524                       (p1>sgp->Blist[tmp].end_pos1-1) &&
525                       (data->seq1[p1-1] == data->seq2[p2-1])) {
526                           p1--; p2--;
527                }
528                sgp->Blist[i].init_pos1 = p1+1;
529                sgp->Blist[i].init_pos2 = p2+1;
530 
531                /* if it exists, extend sgp->Blist[i1].next to the right */
532                p1 = sgp->Blist[tmp].end_pos1;
533                p2 = sgp->Blist[tmp].end_pos2;
534                while ((p1<sgp->Blist[i].init_pos1-1) &&
535                       (data->seq1[p1] == data->seq2[p2])) {
536                           p1++; p2++;
537                }
538                sgp->Blist[tmp].end_pos1 = p1;
539                sgp->Blist[tmp].end_pos2 = p2;
540 
541                sgp->Blist[i1].init_pos2 += MAX(sgp->Blist[tmp].end_pos1+1,sgp->Blist[i1].init_pos1)
542                                       -sgp->Blist[i1].init_pos1;
543                sgp->Blist[i1].init_pos1 = MAX(sgp->Blist[tmp].end_pos1+1,sgp->Blist[i1].init_pos1);
544 
545                sgp->Blist[i1].end_pos2 -= sgp->Blist[i1].end_pos1-
546                                      MIN(sgp->Blist[i].init_pos1-1,sgp->Blist[i1].end_pos1);
547                sgp->Blist[i1].end_pos1 = MIN(sgp->Blist[i].init_pos1-1,sgp->Blist[i1].end_pos1);
548                if (sgp->Blist[i1].end_pos1-sgp->Blist[i1].init_pos1+1<data->W) {
549 
550                    if (sgp->Blist[tmp].end_pos1>=sgp->Blist[i].init_pos1) {
551 
552                         /* i and next[i1] overlap */
553                         length1 = sgp->Blist[tmp].end_pos1-sgp->Blist[tmp].init_pos1+1;
554                         length2 = sgp->Blist[i].end_pos1-sgp->Blist[i].init_pos1+1;
555 
556                         if (length2 >= length1) {
557                             sgp->Blist[tmp].end_pos2 -= sgp->Blist[tmp].end_pos1-sgp->Blist[i].init_pos1+1;
558                             sgp->Blist[tmp].end_pos1 = sgp->Blist[i].init_pos1-1;
559                         } else {
560                             sgp->Blist[i].init_pos2 += sgp->Blist[tmp].end_pos1+1-
561                                                   sgp->Blist[i].init_pos1;
562                             sgp->Blist[i].init_pos1 = sgp->Blist[tmp].end_pos1+1;
563                         }
564                    }
565                    sgp->Blist[i].next = sgp->Blist[i1].next;
566                } else i = i1;
567 
568       /* ===========   HERE ENDS  THE NEW CODE =========== */
569 
570       }
571 
572     else i = i1;
573   }
574 
575   /* separate forward greedy code for the right end */
576 
577   i1 = sgp->Blist[i=0].next;
578   diff = sgp->Blist[i].init_pos1-sgp->Blist[i1].end_pos1-1;
579   if(i1 && diff) {
580 
581     bno = fgreedy(data->seq1 + sgp->Blist[i1].end_pos1,
582 		  data->seq2 + sgp->Blist[i1].end_pos2,
583 		  diff,
584 		  sgp->Blist[i].init_pos2 - sgp->Blist[i1].end_pos2 - 1,
585 		  sgp->Blist[i1].end_pos1,sgp->Blist[i1].end_pos2,
586 		  data, sgp);
587 
588     if(bno) {
589       sgp->Blist[i].next = data->b_offset;
590       sgp->Blist[data->b_offset + bno - 1].next = i1;
591       data->b_offset += bno;
592     }
593     else if(diff <= sgp->Blist[i].init_pos2 - sgp->Blist[i1].end_pos2 - 1) {
594       /*  attempt the sim3 matching strategy  */
595       hflag = locate(&I1, &J1, &I2, &J2,
596 		     data->seq1 + sgp->Blist[i1].end_pos1,
597 		     data->seq2 + sgp->Blist[i1].end_pos2,
598 		     diff,
599 		     sgp->Blist[i].init_pos2 - sgp->Blist[i1].end_pos2 - 1,
600 		     (Int4)(BETA * data->W), data);
601       if(hflag == TRUE) {
602 	/*
603 	  printf("I1=%d, I2=%d,J1=%d,J2=%d\n",
604 	  I1+Blist[i1].end_pos1,
605 	  I2+Blist[i1].end_pos1,
606 	  J1+Blist[i1].end_pos2,
607 	  J2+Blist[i1].end_pos2);
608 	  */
609 	offset1 = sgp->Blist[i1].end_pos1;
610 	offset2 = sgp->Blist[i1].end_pos2;
611 
612 	sgp->Blist[data->b_offset].init_pos1 = offset1 + I1 + 1;
613 	sgp->Blist[data->b_offset].init_pos2 = offset2 + J1 + 1;
614 	sgp->Blist[data->b_offset].end_pos1 = offset1 + I2;
615 	sgp->Blist[data->b_offset].end_pos2 = offset2 + J2;
616 
617 	sgp->Blist[i].next = data->b_offset;
618 	sgp->Blist[data->b_offset].next = i1;
619 	data->b_offset++;
620       }
621       /* else printf("Right end of the cDNA sequence does not match the genome.\n"); */
622     }
623 
624     i = i1;
625   }
626 
627 
628   /* step 2: greedy method on unmatched subsequences */
629 
630   while(i1 = sgp->Blist[i].next) {
631     diff = sgp->Blist[i].init_pos1-sgp->Blist[i1].end_pos1-1;
632     if (diff)
633 #ifdef XFLAG
634       if ((diff<=MIN_EXON_SIZE) && sgp->Blist[i1].end_pos1){
635 	if (diff<=sgp->Blist[i].init_pos2-sgp->Blist[i1].end_pos2-1)
636 	  {
637 	    strncpy(temp,seq1+sgp->Blist[i1].end_pos1,diff);
638 	    temp[diff] = '\0';
639 	    ulcell = -1; mcell = MAXINT;
640 	    l = sgp->Blist[i1].end_pos2;
641 	    u = sgp->Blist[i].init_pos2-diff-1;
642 	    while (l<=u)
643 	      { mismatch = 0;
644 	      for (j=0; j<diff; j++)
645 		if (seq2[u+j]!=temp[j]) mismatch++;
646 	      if (((sgp->Blist[i].init_pos2-u-diff<=ALPHA*W) && (mismatch<=1)) ||
647 		  ((sgp->Blist[i].init_pos2-u-diff>ALPHA*W) && !mismatch))
648 		{ ulcell = l = u;
649 		mcell = mismatch;
650 		break; }
651 
652 	      if (mismatch<mcell)
653 		{ ulcell=u; mcell=mismatch; }
654 	      u--;
655 
656 	      mismatch = 0;
657 	      for (j=0; j<diff; j++)
658 		if (seq2[l+j]!=temp[j]) mismatch++;
659 	      if (((l+1-sgp->Blist[i1].end_pos2<=ALPHA*W) && (mismatch<=1)) ||
660 		  ((l+1-sgp->Blist[i1].end_pos2>ALPHA*W) && !mismatch))
661 		{ ulcell = u = l;
662 		mcell = mismatch;
663 		break; }
664 	      if (mismatch<mcell)
665 		{ ulcell=l; mcell=mismatch; }
666 	      l++;
667               }
668 
669 	    /* introduce block */
670 	    sgp->Blist[b_offset].init_pos1 = sgp->Blist[i1].end_pos1+1;
671 	    sgp->Blist[b_offset].init_pos2 = ulcell+1;
672 	    sgp->Blist[b_offset].end_pos1 = sgp->Blist[i].init_pos1-1;
673 	    sgp->Blist[b_offset].end_pos2 = sgp->Blist[b_offset].init_pos2+diff-1;
674 	    sgp->Blist[i].next = b_offset;
675 	    sgp->Blist[b_offset].next = i1;
676 	    b_offset++;
677 	  }
678 	else { sgp->Blist[b_offset].init_pos1 = sgp->Blist[i1].end_pos1+1;
679 	sgp->Blist[b_offset].init_pos2 = sgp->Blist[i1].end_pos2+1;
680 	sgp->Blist[b_offset].end_pos1 = sgp->Blist[i].init_pos1-1;
681 	sgp->Blist[b_offset].end_pos2 = sgp->Blist[i].init_pos2-1;
682 	sgp->Blist[i].next = b_offset;
683 	sgp->Blist[b_offset].next = i1;
684 	b_offset++;
685 	}
686 
687       }
688       else
689 #endif
690 	{
691 	  /* gap is bigger than min-exon-size or begin sequence */
692 	  /*
693 	    print_format(diff, seq1+Blist[i1].end_pos1,LL,"d1");
694 	    print_format(Blist[i].init_pos2-Blist[i1].end_pos2-1,
695 	    seq2+Blist[i1].end_pos2,LL,"d2");
696 	    */
697 
698 	  if(sgp->Blist[i1].end_pos1) {
699 
700 	    last  = (Int4Ptr )MemNew(sizeof(Int4));
701 	    first = (Int4Ptr )MemNew(sizeof(Int4));
702 
703 	    bno = greedy(data->seq1 + sgp->Blist[i1].end_pos1, data->seq2 + sgp->Blist[i1].end_pos2,
704 			 diff, sgp->Blist[i].init_pos2 - sgp->Blist[i1].end_pos2 - 1,
705 			 sgp->Blist[i1].end_pos1, sgp->Blist[i1].end_pos2,
706 			 &first, &last, &dist, data, sgp);
707 	    if(dist > DIST_CUTOFF) {
708 	      if((hflag = locate(&I1, &J1, &I2, &J2,
709 				 data->seq1 + sgp->Blist[i1].end_pos1,
710 				 data->seq2 + sgp->Blist[i1].end_pos2,
711 				 diff,
712 				 sgp->Blist[i].init_pos2 - sgp->Blist[i1].end_pos2 - 1,
713 				 (Int4)(BETA * data->W), data)) == TRUE) {
714 		offset1 = sgp->Blist[i1].end_pos1;
715 		offset2 = sgp->Blist[i1].end_pos2;
716 
717 		sgp->Blist[data->b_offset].init_pos1 = offset1+I1+1;
718 		sgp->Blist[data->b_offset].init_pos2 = offset2+J1+1;
719 		sgp->Blist[data->b_offset].end_pos1 = offset1+I2;
720 		sgp->Blist[data->b_offset].end_pos2 = offset2+J2;
721 
722 		sgp->Blist[i].next = data->b_offset;
723 		sgp->Blist[data->b_offset].next = i1;
724 		data->b_offset++;
725 	      }
726 	      else {     /* else if hflag */
727 		/* heuristic sim3 fails; use result produced by greedy */
728 		sgp->Blist[i].next = *first;
729 		sgp->Blist[*last].next = i1;
730 		data->b_offset += bno;
731 	      }
732 	    }
733 	    else {
734 	      sgp->Blist[i].next = *first;
735 	      sgp->Blist[*last].next = i1;
736 	      data->b_offset += bno;
737 	    }
738             MemFree(last);
739             MemFree(first);
740 	  }
741 	  else {
742 	    bno = bgreedy(data->seq1 + sgp->Blist[i1].end_pos1,
743 			  data->seq2 + sgp->Blist[i1].end_pos2,
744 			  diff,
745 			  sgp->Blist[i].init_pos2 - sgp->Blist[i1].end_pos2 - 1,
746 			  sgp->Blist[i1].end_pos1,
747 			  sgp->Blist[i1].end_pos2,
748 			  data, sgp);
749 	    if(bno) {
750 	      sgp->Blist[i].next = bno - 1 + data->b_offset;
751 	      sgp->Blist[data->b_offset].next = i1;
752 	      data->b_offset += bno;
753 	    }
754 	    else if(diff <= sgp->Blist[i].init_pos2 - sgp->Blist[i1].end_pos2 - 1) {
755 	      /* attempt the matching strategy of sim3  */
756 	      hflag = locate(&I1, &J1, &I2, &J2,
757 			     data->seq1 + sgp->Blist[i1].end_pos1,
758 			     data->seq2 + sgp->Blist[i1].end_pos2,
759 			     diff,
760 			     sgp->Blist[i].init_pos2 - sgp->Blist[i1].end_pos2 - 1,
761 			     (Int4)(BETA * data->W), data);
762 	      if(hflag == TRUE) {
763 		/*
764 		  printf("I1=%d, I2=%d,J1=%d,J2=%d\n",
765 		  I1+Blist[i1].end_pos1,
766 		  I2+Blist[i1].end_pos1,
767 		  J1+Blist[i1].end_pos2,
768 		  J2+Blist[i1].end_pos2);
769 		  */
770 		offset1 = sgp->Blist[i1].end_pos1;
771 		offset2 = sgp->Blist[i1].end_pos2;
772 
773 		sgp->Blist[data->b_offset].init_pos1 = offset1+I1+1;
774 		sgp->Blist[data->b_offset].init_pos2 = offset2+J1+1;
775 		sgp->Blist[data->b_offset].end_pos1 = offset1+I2;
776 		sgp->Blist[data->b_offset].end_pos2 = offset2+J2;
777 
778 		sgp->Blist[i].next = data->b_offset;
779 		sgp->Blist[data->b_offset].next = i1;
780 		data->b_offset++;
781 	      }
782 	      /* else printf("Left end of the cDNA sequence does not match the genome.\n");	*/
783 	    }
784 	  }
785 	}
786     i = i1;
787     }
788 
789   /* just printing ...*/
790 #ifdef DEBUG
791   i=0;
792   while (i=sgp->Blist[i].next)
793     {
794       fprintf (stderr,"B[%2d]:  %5d  %5d  %5d  %5d\n",i,sgp->Blist[i].init_pos1,
795 	       sgp->Blist[i].init_pos2,sgp->Blist[i].end_pos1,sgp->Blist[i].end_pos2);
796     }
797 
798   fprintf(stderr,"\n**********************************\n\n");
799   fprintf(stderr,"NORMALIZATION:\n\n");
800 #endif
801 
802 
803   /* second cross - normalize blocks; compaction step  */
804 
805   i = sgp->Blist[0].next;
806   while((i1 = sgp->Blist[i].next) && (sgp->Blist[i1].init_pos1)) {
807     /*
808       if (Blist[i].init_pos2-Blist[i1].end_pos2<=
809       Blist[i].init_pos1-Blist[i1].end_pos1+ALPHA*W)
810       */
811     if(ABS(sgp->Blist[i].init_pos2 - sgp->Blist[i].init_pos1 -
812 	   sgp->Blist[i1].end_pos2 + sgp->Blist[i1].end_pos1) <= ALPHA * data->W)
813       /* discard block */
814       {
815 	sgp->Blist[i].init_pos1 = sgp->Blist[i1].init_pos1;
816 	sgp->Blist[i].init_pos2 = sgp->Blist[i1].init_pos2;
817 	sgp->Blist[i].next = sgp->Blist[i1].next;
818       }
819     else i = i1;
820   }
821 
822 #ifdef DEBUG
823   /* just printing ... */
824   printf("\n*******************************************\n");
825   printf ("\nBlock decomposition:\n\n");
826   i=0;
827   while ((i=sgp->Blist[i].next) && (sgp->Blist[i].end_pos1))
828     {
829       printf ("Block:  I1=%5d,  I2=%5d,  J1=%5d,  J2=%5d\n", sgp->Blist[i].init_pos1,
830 	      sgp->Blist[i].init_pos2,sgp->Blist[i].end_pos1,sgp->Blist[i].end_pos2);
831     }
832   printf("\n*******************************************\n\n");
833 #endif
834   return TRUE;
835 }
836 
free_match_list_chain(Matchelt * curr)837 static void free_match_list_chain (Matchelt *curr)
838 {
839 	Matchelt *next;
840 
841 	while(curr)
842 	{
843 		next = curr->mnext;
844 		MemFree(curr);
845 		curr = next;
846 	}
847 
848 }
849 
free_table_chain(Table * curr)850 static void free_table_chain (Table *curr)
851 {
852 	Table *next;
853 
854 	while(curr)
855 	{
856 		next = curr->next;
857 		MemFree(curr);
858 		curr = next;
859 	}
860 }
861 
segment(Int1 * s1,Int1 * s2,Int4 M,Int4 N,Int4 W,Int4 offset1,Int4 offset2,data_t * data,SIM4GlobalPtr sgp)862 static Int4 segment(Int1 *s1,Int1 *s2,Int4 M,Int4 N,Int4 W,
863 		    Int4 offset1, Int4 offset2, data_t *data, SIM4GlobalPtr sgp)
864 {
865   Int4 No_branches, bno;
866   Int4 i, j;
867   Links *Lkptr;
868   ValNodePtr data_list;
869 
870   bld_table(s1, M, W, data, sgp);
871 
872   /* attach to each position in seq2 the list of
873      positions in seq1 having the same content */
874   bld_lists(s2, N, W, data, sgp);
875 
876   /* create matching lists */
877   create_mlists(M,N,W, sgp);
878 
879   /* find longest increasing subsequence of positions */
880   lisseq(M,N,W,&Lkptr, sgp, &data_list);
881 
882   bno = find_blocks(W, Lkptr, offset1, offset2, data, sgp);
883 
884   No_branches = power2(MAX(0,2*W-16));
885   for (i=0; i<No_branches; i++)
886   {
887 	for(j = 0; j <= D_MASK; j++)
888 	{
889 		if(sgp->pos_tbl[i][j] != NULL)
890 		{
891 			free_table_chain (sgp->pos_tbl[i][j]);
892 			sgp->pos_tbl[i][j] = NULL;
893 		}
894 	}
895   	MemFree(sgp->pos_tbl[i]);
896   }
897   MemFree(sgp->pos_tbl);
898 
899 
900   for(i = 0; i <= M - W + 1; i++)
901   {
902 	if(sgp->Matchlist[i] != NULL)
903 	{
904 		free_match_list_chain (sgp->Matchlist[i]);
905 		sgp->Matchlist[i] = NULL;
906 	}
907   }
908   MemFree(sgp->Matchlist);
909 
910   ValNodeFreeData(data_list);
911   return bno;
912 }
913 
lisseq(Int4 M,Int4 N,Int4 W,Links ** Lkptr,SIM4GlobalPtr sgp,ValNodePtr PNTR data_list)914 static void lisseq(Int4 M, Int4 N, Int4 W, Links **Lkptr, SIM4GlobalPtr sgp, ValNodePtr PNTR data_list)
915 {
916   Threshelt *Thresh;
917   Matchelt  *mptr;
918   Links     *new;
919   Int4 count, k, thr_size;  /* k an index of Thresh[.]; */
920   ValNodePtr curr, prev;
921   /* so k in 1,...MIN(M1,N1) */
922 
923   *data_list = NULL;
924   prev = NULL;
925   thr_size = MIN(N1, M1);    /* initially N1; modified */
926   Thresh = (Threshelt *)MemNew((thr_size + 1) * sizeof(Threshelt));
927   for(count = 1; count <= thr_size; count++) {
928     Thresh[count].tblock = N1 + 1;
929     Thresh[count].tref = NULL;
930   }
931 
932   Thresh[0].tblock = 0;
933   Thresh[0].tref = NULL;
934 
935   sgp->Link = (Links **)MemNew((thr_size + 1) * sizeof(Links *));
936   sgp->Link[0] = NULL;
937 
938   for(count = 1; count <= M1; count++) {
939     /* Link[count] = NULL; */
940     /* for j in Matchlist[i] */
941     mptr = sgp->Matchlist[count];
942     while(mptr != NULL) {
943 
944       /* find k */
945       if(!(k = btfind(mptr->mblock, Thresh, thr_size))) {
946 	ErrPostEx(SEV_FATAL, 1, 0, "Invalid data!");
947 	exit(1);
948       }
949       if((mptr->mblock<Thresh[k].tblock) ||
950 	 ((mptr->mblock==Thresh[k].tblock) &&
951 	  (count>(Thresh[k].tref)->pos))) {
952 	Thresh[k].tblock = mptr->mblock;
953 	Thresh[k].tref = mptr->mref;
954 	new = (Links *)MemNew(sizeof(Links));
955 	new->lptr = mptr;
956 	new->lnext = sgp->Link[k-1];
957 	sgp->Link[k] = new;
958 
959 	/*store the data into a chain. for freeing in the future */
960 	curr = ValNodeNew(NULL);
961 	curr->data.ptrvalue = new;
962 	if(prev == NULL)
963 		*data_list = curr;
964 	else
965 		prev->next = curr;
966 	prev = curr;
967 
968       }
969       mptr = mptr->mnext;
970     }
971   }
972   k = MIN(M1, N1);
973   while(Thresh[k].tblock == N1 + 1) k--;
974   *Lkptr = sgp->Link[k];
975   MemFree(Thresh);
976 }
977 
978 
btfind(Int4 key,Threshelt * Thr,Int4 thr_size)979 static Int4 btfind(Int4 key, Threshelt *Thr, Int4 thr_size)
980 {
981   Int4 l, u, index;
982 
983   l = 1; u = thr_size;
984 
985   while(l <= u) {
986     index = (Int4)(l + u)/2;
987     if((Thr[index - 1].tblock<key) && (key <= Thr[index].tblock))
988       return index;
989     else
990       if(key <= Thr[index].tblock) u = index - 1;
991       else l = index + 1;
992   }
993   return 0;
994 }
995 
996 /* static void code2str(Uint4 ecode, Int1 *temp, int W) */
997 /* { int i, mask = 3; */
998 
999 /*   temp[W] = '\0'; */
1000 /*   for (i=W-1; i>=0; i--) */
1001 /*      { temp[i] = decoding[mask&ecode]; */
1002 /*        ecode = ecode>>2; */
1003 /*      } */
1004 /* } */
1005 
1006 /* Int4 get_seq(Int1 *filename, Int1 **seq) */
1007 /* {  */
1008 /*   FILE *fp; */
1009 /*   Int1 buffer[1000], *buf, *s; */
1010 /*   int nlines=0; */
1011 
1012 /*   fp = ckopen(filename,"r"); */
1013 /*   while (fgets(buffer,1000,fp)!=NULL) */
1014 /*         if (buffer[0]!='>') nlines++; */
1015 /*   s = *seq = (Int1 *)MemNew(nlines*LL+1); */
1016 /*   rewind(fp); */
1017 /*   while (fgets(buffer,1000,fp)!=NULL) */
1018 /*       if (buffer[0]!='>') { */
1019 /*             buf = buffer; */
1020 /*             while ((*buf) && (*buf!='\n')) */
1021 /*                *s++ = *buf++; */
1022 /*       } */
1023 /*   *s = '\0'; */
1024 /*   return s-*seq; */
1025 /* } */
1026 
create_mlists(Int4 M,Int4 N,Int4 W,SIM4GlobalPtr sgp)1027 static void create_mlists(Int4 M, Int4 N, Int4 W, SIM4GlobalPtr sgp)
1028      /* Matchlist[i] - retains the list corresponding to position i+W-1
1029    in sequence 2 (the long one ) */
1030 {
1031   Int4 i;
1032   Matchelt *new;
1033   Table *ptr;
1034 
1035   sgp->Matchlist = (Matchelt **)MemNew((M - W + 2)*sizeof(Matchelt *));
1036   for(i = 0; i <= M - W + 1; i++)
1037     sgp->Matchlist[i] = NULL;
1038 
1039   /* traverse s2 */
1040   for(i = 1; i <= N - W + 1; i++) {
1041     ptr = sgp->lists[i];
1042     while(ptr != NULL) {
1043       new = (Matchelt *)MemNew(sizeof(Matchelt));
1044       new->mblock = i;
1045       new->mnext = sgp->Matchlist[ptr->pos - W + 1];
1046       new->mref = ptr;
1047       sgp->Matchlist[ptr->pos - W + 1] = new;
1048       ptr = ptr->next;
1049     }
1050   }
1051 }
1052 
bld_lists(Int1 * s2,Int4 N,Int4 W,data_t * data,SIM4GlobalPtr sgp)1053 static void bld_lists(Int1 *s2, Int4 N, Int4 W, data_t *data, SIM4GlobalPtr sgp)
1054 {
1055   Int4 i, si;
1056   Uint4 ecode, mask, temp;
1057   Int1 *s, *marker;
1058 
1059   sgp->lists = (Table **)MemNew((N - W + 2) * sizeof(Table *));
1060   for(i = 0; i <= N - W + 1; i++)   sgp->lists[i] = NULL;
1061   mask = (1 << (W + W - 2)) - 1;
1062   s = s2 - 1;
1063 
1064   ecode = 0L;
1065   for(i = si = 1; i < W; ++si)
1066     if((temp = data->encoding[*++s]) != 4) {
1067       ecode = (ecode << 2) + temp;
1068       i++;
1069     }
1070   marker = s+1;
1071   for(; (*++s) && (si <= N); si++)
1072     if((temp = data->encoding[*s]) != 4) {
1073       ecode = ((ecode & mask) << 2) + temp;
1074       sgp->lists[s - marker + 1] = sgp->pos_tbl[ecode >> 16][ecode&D_MASK];
1075     }
1076 }
1077 
1078 
find_blocks(Int4 W,Links * lk,Int4 offset1,Int4 offset2,data_t * data,SIM4GlobalPtr sgp)1079 static Int4 find_blocks(Int4 W, Links *lk, Int4 offset1, Int4 offset2, data_t *data, SIM4GlobalPtr sgp)
1080 {
1081   Links *lkptr;
1082 
1083   Int4 i, bno;
1084   Int4 ppos1, ppos2, cpos2, cpos1;
1085 
1086   lkptr = lk;
1087   bno = 0;
1088   ppos2 = MAXINT;
1089   cpos1 = cpos2 = ppos1 = 0;
1090   while(lkptr != NULL) {
1091     cpos2 = lkptr->lptr->mblock+W-1;
1092     cpos1 = lkptr->lptr->mref->pos;
1093 
1094     if(abs(ppos2 - ppos1 - cpos2 + cpos1) > ALPHA * W)  {
1095       if((bno < 2) || (sgp->Blist[bno - 1 + data->b_offset].init_pos1 - offset1 - cpos1 > W)) {
1096 	  sgp->Blist[bno + data->b_offset].init_pos1 = ppos1 + offset1;
1097 	  sgp->Blist[bno + data->b_offset].init_pos2 = ppos2 + offset2;
1098 	  bno++;
1099 	}
1100       sgp->Blist[bno + data->b_offset].end_pos1 = cpos1 + offset1;
1101       sgp->Blist[bno + data->b_offset].end_pos2 = cpos2 + offset2;
1102     }
1103     ppos1 = cpos1; ppos2 = cpos2;
1104     lkptr = lkptr->lnext;
1105   }
1106   if((bno < 2) || (sgp->Blist[bno - 1 + data->b_offset].init_pos1 - offset1 - cpos1 > W)) {
1107     sgp->Blist[bno + data->b_offset].init_pos1 = cpos1 + offset1;
1108     sgp->Blist[bno + data->b_offset].init_pos2 = cpos2 + offset2;
1109   }
1110   else bno--;
1111 
1112   if((bno > 1) && (sgp->Blist[bno - 1 + data->b_offset].init_pos1 <= W + offset1)) bno--;
1113 
1114   sgp->Blist[data->b_offset].next = data->b_offset + 1;
1115   sgp->Blist[bno + data->b_offset + 1].end_pos1 = sgp->Blist[bno + data->b_offset + 1].end_pos2 = 0;
1116 
1117   for(i = 1; i <= bno; i++) {
1118     sgp->Blist[i + data->b_offset].next = i + data->b_offset + 1;
1119     cpos1 = MAX(sgp->Blist[i + data->b_offset].init_pos1 - W + 1,
1120 		sgp->Blist[i + 1 + data->b_offset].end_pos1 + 1);
1121     sgp->Blist[i + data->b_offset].init_pos2 =
1122       MAX(sgp->Blist[i + data->b_offset].init_pos2 - sgp->Blist[i + data->b_offset].init_pos1 + cpos1,
1123 	  sgp->Blist[i + 1 + data->b_offset].end_pos2 + 1);
1124     sgp->Blist[i + data->b_offset].init_pos1 = cpos1;
1125   }
1126 
1127   sgp->Blist[bno + data->b_offset].next = 0;
1128 
1129   return bno;
1130 }
1131 
link_to_data_list(Pointer data,ValNodePtr PNTR head,ValNodePtr PNTR prev)1132 static void link_to_data_list(Pointer data, ValNodePtr PNTR head, ValNodePtr PNTR prev)
1133 {
1134 	ValNodePtr curr;
1135 
1136              curr = ValNodeNew(NULL);
1137              curr->data.ptrvalue = data;
1138              if(*prev == NULL)
1139 		*head = curr;
1140 	     else
1141 		(*prev)->next = curr;
1142 	     *prev = curr;
1143 }
1144 
1145 
bgreedy(Int1 * s1,Int1 * s2,Int4 m,Int4 n,Int4 offset1,Int4 offset2,data_t * data,SIM4GlobalPtr sgp)1146 static Int4 bgreedy(Int1 *s1, Int1 *s2, Int4 m, Int4 n,
1147 		    Int4 offset1, Int4 offset2, data_t *data, SIM4GlobalPtr sgp)
1148 {
1149   Int4
1150     col,			/* column number */
1151     row,			/* row number */
1152     max_d,			/* bound on the length of edit script */
1153     d,				/* current distance */
1154     k,				/* current diagonal */
1155     bno=0,			/* returned number of blocks */
1156     DELTA,			/* n-m  */
1157     ORIGIN,
1158     lower,			/* boundaries for searching diagonals */
1159     upper;
1160 
1161   Int4     *last_d, *temp_d;	/* row containing the last d */
1162   struct edit **script,		/* corresponding edit script */
1163     **temp_script;		/* temporary edit script */
1164   struct edit *new;
1165   ValNodePtr data_list = NULL, prev = NULL;
1166 
1167   DELTA = n-m;
1168   max_d = DIST_CUTOFF;
1169 
1170   /*
1171     if ((DELTA<0) &&
1172     (abs(DELTA) > MAX((Int4)(ALPHA*DEFAULT_W),(Int4)(CUTOFF*n))))
1173     fprintf(stderr,"\nWarning: data do not support proper segmentation!\n\n");
1174     */
1175 
1176   ORIGIN = m;
1177   for(row = m, col = n; row > 0 && col > 0 && (s1[row - 1] == s2[col - 1]); row--,col--);
1178 
1179   if(row == 0) {
1180     /* hit first row; stop search */
1181     sgp->Blist[data->b_offset].init_pos1 = offset1 + 1;
1182     sgp->Blist[data->b_offset].end_pos1 = offset1 + m;
1183     sgp->Blist[data->b_offset].init_pos2 = offset2 - m + n + 1;
1184     sgp->Blist[data->b_offset].end_pos2 = offset2 + n;
1185 
1186     return ++bno;
1187   }
1188   if(col == 0) {
1189     /* hit first column */
1190     sgp->Blist[data->b_offset].init_pos1 = offset1 + m - n + 1;
1191     sgp->Blist[data->b_offset].end_pos1 = offset1 + m;
1192     sgp->Blist[data->b_offset].init_pos2 = offset2 + 1;
1193     sgp->Blist[data->b_offset].end_pos2 = offset2 + n;
1194 
1195     return ++bno;
1196   }
1197 
1198   last_d = (Int4Ptr )MemNew((m + n + 1) * sizeof(Int4));
1199   temp_d = (Int4Ptr )MemNew((m + n + 1) * sizeof(Int4));
1200   script = (struct edit **)MemNew((m + n + 1)*sizeof(struct edit *));
1201   temp_script = (struct edit **)MemNew((m + n + 1)*sizeof(struct edit *));
1202 
1203   for(k = 0; k <= m + n; script[k] = NULL, last_d[k++] = m + 1);
1204   last_d[ORIGIN+DELTA] = row;
1205 
1206   lower = ORIGIN + DELTA - 1;
1207   upper = ORIGIN + DELTA + 1;
1208 
1209   for(d = 1; d <= max_d ; ++d)  {
1210 
1211     /* for each relevant diagonal ... */
1212     for(k = lower; k <= upper; k++) {
1213       /* get space for the next edit instruction */
1214       new = (struct edit *) MemNew(sizeof(struct edit));
1215 	  link_to_data_list((Pointer)new, &data_list, &prev);
1216 
1217       /* find a d on diagonal k */
1218       if (k==-d+DELTA+ORIGIN) {
1219 
1220 	/* move left from the last d-1 on diagonal k+1 */
1221 	row = last_d[k+1];
1222 	new->link = script[k+1];
1223 	new->op = INS;
1224       } else if (k==d+DELTA+ORIGIN) {
1225 
1226 	/* move up from the last d-1 on diagonal k-1 */
1227 	row = last_d[k-1]-1;
1228 	new->link = script[k-1];
1229 	new->op = DEL;
1230       } else if ((last_d[k]-1<=last_d[k+1]) &&
1231 		 (last_d[k]-1<=last_d[k-1]-1)) {
1232 	/* substitution */
1233 	row = last_d[k]-1;
1234 	new->link = script[k];
1235 	new->op = SUB;
1236 
1237       } else if ((last_d[k-1]-1<=last_d[k+1]) &&
1238 		 (last_d[k-1]-1<=last_d[k]-1)) {
1239 	/* move right from the last d-1 on diagonal k-1 */
1240 	row = last_d[k-1]-1;
1241 	new->link = script[k-1];
1242 	new->op = DEL;
1243       } else  {
1244 
1245 	/* move left from the last d-1 on diagonal k+1 */
1246 	row = last_d[k+1];
1247 	new->link = script[k+1];
1248 	new->op = INS;
1249       }
1250       /* code common to the three cases */
1251       new->line1 = row;
1252       new->line2 = col = row + k - ORIGIN;
1253       temp_script[k] = new;
1254 
1255       /* slide up the diagonal */
1256       while (row > 0 && col > 0 && (s1[row-1]==s2[col-1])) {
1257 	--row;
1258 	--col;
1259       }
1260       temp_d[k] = row;
1261 
1262       if (row == 0 && col == 0) {
1263 	/* hit northwest corner; have the answer */
1264 	script[k] = temp_script[k];
1265 		MemFree(last_d); MemFree(temp_d);
1266 	bno = bput_scr(script[k],m,n,offset1,offset2,0,0,data->b_offset, sgp);
1267         MemFree(script);
1268         MemFree(temp_script);
1269         ValNodeFreeData(data_list);
1270 
1271 	return bno;
1272       }
1273       if (row == 0) {
1274 
1275 	/* hit last row, don't look to the right;
1276 	   record first and final insertions in the
1277 	   list -- possibly identical.
1278 	   */
1279 	script[k] = temp_script[k];
1280 
1281 	/* record first INS */
1282 	new = (struct edit *) MemNew(sizeof(struct edit));
1283 	link_to_data_list((Pointer)new, &data_list, &prev);
1284 	new->line1 = 0;
1285 	new->line2 = col = --k - ORIGIN;
1286 	new->op = INS;
1287 	new->link = script[k+1];
1288 	script[k] = new;
1289 
1290 	/* record last INS */
1291 	new = (struct edit *) MemNew(sizeof(struct edit));
1292 	link_to_data_list((Pointer)new, &data_list, &prev);
1293 	new->line1 = 0;
1294 	new->line2 = col = 0;
1295 	new->op = INS;
1296 	new->link = script[k];
1297 	script[ORIGIN] = new;
1298 
1299 	MemFree(last_d); MemFree(temp_d);
1300 	bno = bput_scr(script[ORIGIN],m,n,offset1,offset2,0,0,data->b_offset, sgp);
1301     MemFree(script);
1302     MemFree(temp_script);
1303     ValNodeFreeData(data_list);
1304 
1305 	return bno;
1306       }
1307 
1308       if (col == 0) {
1309 
1310 	/* hit last column, don't look forward;
1311 	   record first and final deletions in the
1312 	   list, possibly identical.
1313 	   */
1314 	script[k] = temp_script[k];
1315 
1316 	/* record first DEL */
1317 	new = (struct edit *) MemNew(sizeof(struct edit));
1318 	link_to_data_list((Pointer)new, &data_list, &prev);
1319 	new->line1 = -(++k) + ORIGIN;
1320 	new->line2 = 0;
1321 	new->op = DEL;
1322 	new->link = script[k-1];
1323 	script[k] = new;
1324 
1325 	/* record last DEL */
1326 	new = (struct edit *) MemNew(sizeof(struct edit));
1327 	link_to_data_list((Pointer)new, &data_list, &prev);
1328 	new->line1 = 0;
1329 	new->line2 = 0;
1330 	new->op = DEL;
1331 	new->link = script[k];
1332 	script[ORIGIN] = new;
1333 
1334 
1335 	bno = bput_scr(script[ORIGIN],m,n,offset1,offset2,0,0,data->b_offset, sgp);
1336 	MemFree(last_d); MemFree(temp_d);
1337 
1338     MemFree(script);
1339     MemFree(temp_script);
1340     ValNodeFreeData(data_list);
1341 
1342 	return bno;
1343       }
1344 
1345     }
1346     for (k=lower; k<=upper; k++) {
1347       script[k] = temp_script[k];
1348       last_d[k] = temp_d[k];
1349     }
1350 
1351     --lower;
1352     ++upper;
1353   }
1354   MemFree(last_d);
1355   MemFree(temp_d);
1356   MemFree(script);
1357   MemFree(temp_script);
1358   ValNodeFreeData(data_list);
1359 
1360   return bno=0;
1361 }
1362 
fgreedy(Int1 * s1,Int1 * s2,Int4 m,Int4 n,Int4 offset1,Int4 offset2,data_t * data,SIM4GlobalPtr sgp)1363 static Int4 fgreedy(Int1 *s1, Int1 *s2, Int4 m, Int4 n, Int4 offset1, Int4 offset2, data_t *data, SIM4GlobalPtr sgp)
1364 {
1365   Int4     col,                    /* column number */
1366           row,                    /* row number */
1367           max_d,                  /* bound on the length of the edit script */
1368           d,                      /* current compressed distance */
1369           k,                      /* current diagonal */
1370           bno=0,                  /* returned number of blocks */
1371           DELTA,                  /* n-m  */
1372           ORIGIN,
1373           lower,
1374           upper;
1375   Int4     *last_d, *temp_d;       /* column containing the last p */
1376   struct edit **script,           /* corresponding edit script */
1377               **temp_script;
1378   struct edit *new;
1379   ValNodePtr data_list = NULL, prev = NULL;
1380 
1381 
1382   DELTA = n-m;
1383   max_d = DIST_CUTOFF;
1384 /*
1385   if ((DELTA<0) &&
1386       (abs(DELTA) > MAX((int)(ALPHA*DEFAULT_W),(int)(CUTOFF*n))))
1387         fprintf(stderr,"\nWarning: data do not support proper segmentation!\n\n");
1388 */
1389 
1390   ORIGIN = m;
1391   for (row=0, col=0; col<n && row<m && (s1[row]==s2[col]); row++, col++);
1392 
1393   if (row == m) {
1394        /* hit last row; stop search */
1395 
1396        sgp->Blist[data->b_offset].init_pos1 = offset1+1;
1397        sgp->Blist[data->b_offset].end_pos1 = offset1+m;
1398        sgp->Blist[data->b_offset].init_pos2 = offset2+1;
1399        sgp->Blist[data->b_offset].end_pos2 = offset2+m;
1400 
1401        return ++bno;
1402   }
1403 
1404   if (col == n) {
1405        /* hit last column */
1406 
1407        sgp->Blist[data->b_offset].init_pos1 = offset1+1;
1408        sgp->Blist[data->b_offset].end_pos1 = offset1+n;
1409        sgp->Blist[data->b_offset].init_pos2 = offset2+1;
1410        sgp->Blist[data->b_offset].end_pos2 = offset2+n;
1411 
1412        return ++bno;
1413   }
1414 
1415 
1416   last_d = (Int4Ptr )MemNew((m+n+1)*sizeof(Int4));
1417   temp_d = (Int4Ptr )MemNew((m+n+1)*sizeof(Int4));
1418   script = (struct edit **)MemNew((m+n+1)*sizeof(struct edit *));
1419   temp_script = (struct edit **)MemNew((m+n+1)*sizeof(struct edit *));
1420 
1421   for (k=0; k<=m+n; script[k] = NULL, last_d[k++]=-1);
1422   last_d[ORIGIN] = row;
1423 
1424   lower = ORIGIN - 1;
1425   upper = ORIGIN + 1;
1426 
1427   for (d = 1; d <= max_d ; ++d)  {
1428 
1429           /* for each relevant diagonal ... */
1430           for (k = lower; k <= upper; k++) {
1431                /* get space for the next edit instruction */
1432                new = (struct edit *) MemNew(sizeof(struct edit));
1433 			   link_to_data_list((Pointer)new, &data_list, &prev);
1434 
1435                /* find a d on diagonal k */
1436                if (k==-d+ORIGIN) {
1437 
1438                         /* move down from the last d-1 on diagonal k+1 */
1439                         row = last_d[k+1]+1;
1440                         new->link = script[k+1];
1441                         new->op = DEL;
1442                } else if (k==d+ORIGIN) {
1443 
1444                         /* move right from the last d-1 on diagonal k-1 */
1445                         row = last_d[k-1];
1446                         new->link = script[k-1];
1447                         new->op = INS;
1448                } else if ((last_d[k]>=last_d[k+1]) &&
1449                           (last_d[k]+1>=last_d[k-1])) {
1450 
1451                         /* substitution */
1452                         row = last_d[k]+1;
1453                         new->link = script[k];
1454                         new->op = SUB;
1455                } else if ((last_d[k+1]+1>=last_d[k-1]) &&
1456                           (last_d[k+1]>=last_d[k])) {
1457 
1458                         /* move down from the last d-1 on diagonal k+1 */
1459                         row = last_d[k+1]+1;
1460                         new->link = script[k+1];
1461                         new->op = DEL;
1462                } else {
1463 
1464                         /* move right from the last d-1 on diagonal k-1 */
1465                         row = last_d[k-1];
1466                         new->link = script[k-1];
1467                         new->op = INS;
1468                }
1469 
1470                /* code common to the three cases */
1471                new->line1 = row;
1472                new->line2 = col = row + k - ORIGIN;
1473                temp_script[k] = new;
1474 
1475                /* slide down the diagonal */
1476                if (row>=0)
1477                while (row < m && col < n && (s1[row]==s2[col])) {
1478                                 ++row;
1479                                 ++col;
1480                }
1481                temp_d[k] = row;
1482 
1483                if (row == m && col == n) {
1484                    /* hit southeast corner; have the answer */
1485                    script[k] = temp_script[k];
1486                    bno = fput_scr(script[k],offset1,offset2,m,n,data->b_offset, sgp);
1487 					MemFree(last_d); MemFree(temp_d);
1488 
1489 					MemFree(script);
1490 					MemFree(temp_script);
1491 					ValNodeFreeData(data_list);
1492 
1493                    return bno;
1494                }
1495                if (row == m) {
1496                    /* hit last row; don't look further */
1497                    script[k] = temp_script[k];
1498 
1499                    /* record first INS */
1500                    new = (struct edit *) MemNew(sizeof(struct edit));
1501 				   link_to_data_list((Pointer)new, &data_list, &prev);
1502                    new->line1 = m;
1503                    new->line2 = col = m + (++k) - ORIGIN;
1504                    new->op = INS;
1505                    new->link = script[k-1];
1506                    script[k] = new;
1507 
1508                    /* record last INS */
1509                    new = (struct edit *) MemNew(sizeof(struct edit));
1510 				   link_to_data_list((Pointer)new, &data_list, &prev);
1511                    new->line1 = m;
1512                    new->line2 = n;
1513                    new->op = INS;
1514                    new->link = script[k];
1515                    script[ORIGIN + DELTA] = new;
1516 
1517                    MemFree(last_d); MemFree(temp_d);
1518                    bno = fput_scr(script[ORIGIN+DELTA],offset1,offset2,m,n,data->b_offset, sgp);
1519 
1520 					MemFree(script);
1521 					MemFree(temp_script);
1522 					ValNodeFreeData(data_list);
1523 
1524                    return bno;
1525                }
1526 
1527                if (col == n) {
1528                    /* hit last column; don't look further */
1529                    script[k] = temp_script[k];
1530 
1531                    /* record first DEL */
1532                    new = (struct edit *) MemNew(sizeof(struct edit));
1533 				   link_to_data_list((Pointer)new, &data_list, &prev);
1534                    new->line1 = n - (--k) + ORIGIN;
1535                    new->line2 = n;
1536                    new->op = DEL;
1537                    new->link = script[k+1];
1538                    script[k] = new;
1539 
1540                    /* record last DEL */
1541                    new = (struct edit *) MemNew(sizeof(struct edit));
1542 				   link_to_data_list((Pointer)new, &data_list, &prev);
1543                    new->line1 = m;
1544                    new->line2 = n;
1545                    new->op = DEL;
1546                    new->link = script[k];
1547                    script[ORIGIN] = new;
1548 
1549                    MemFree(last_d); MemFree(temp_d);
1550                    bno = fput_scr(script[ORIGIN+DELTA],offset1,offset2,m,n,data->b_offset, sgp);
1551 					MemFree(script);
1552 					MemFree(temp_script);
1553 					ValNodeFreeData(data_list);
1554                    return bno;
1555                }
1556 
1557 
1558           }
1559           for (k=lower; k<=upper; k++) {
1560                script[k] = temp_script[k];
1561                last_d[k] = temp_d[k];
1562           }
1563 
1564           --lower;
1565           ++upper;
1566      }
1567 	 ValNodeFreeData(data_list);
1568      MemFree(last_d);
1569      MemFree(temp_d);
1570      MemFree(script);
1571      MemFree(temp_script);
1572 
1573      return bno=0;
1574 }
1575 
free_2D_Int(Int4 ** val,Int4 size)1576 static void free_2D_Int(Int4 ** val, Int4 size)
1577 {
1578 	Int4 i;
1579 
1580 	for(i = 0; i<size; ++i)
1581 		MemFree(val[i]);
1582 	MemFree(val);
1583 }
1584 
free_this_script(struct edit *** val,Int4 size)1585 static void free_this_script (struct edit ***val, Int4 size)
1586 {
1587 	Int4 i;
1588 
1589 	for(i = 0; i<size; ++i)
1590 		MemFree(val[i]);
1591 	MemFree(val);
1592 }
1593 
greedy(Int1 * s1,Int1 * s2,Int4 m,Int4 n,Int4 offset1,Int4 offset2,Int4Ptr * first,Int4Ptr * last,Int4Ptr dist,data_t * data,SIM4GlobalPtr sgp)1594 static Int4 greedy(Int1 *s1, Int1 *s2, Int4 m, Int4 n,
1595 		   Int4 offset1, Int4 offset2,
1596 		   Int4Ptr *first, Int4Ptr *last, Int4Ptr dist, data_t *data, SIM4GlobalPtr sgp)
1597 {
1598   Int4     col,                    /* column number */
1599           d,                      /* current distance */
1600           k,                      /* current diagonal */
1601           max_d,                  /* bound on size of edit script */
1602           bno=0,                  /* returned number of blocks */
1603           Cost,
1604           blower,flower,          /* boundaries for searching diagonals */
1605           bupper,fupper,
1606           row,                    /* row number */
1607           DELTA,                  /* n-m  */
1608           ORIGIN;
1609   Int4     back, forth;            /* backward and forward limits at exit */
1610 
1611   Int4     **blast_d, **flast_d;   /* rows containing the last d */
1612   Int4     *min_row, *min_diag,    /* MIN (b)/ MAX (f) row (and diagonal) */
1613           *max_row, *max_diag;    /* reached for cost d=0, ... m.  */
1614 
1615   struct edit ***bscript,         /* corresponding edit scripts */
1616               ***fscript;
1617   struct edit *new;
1618   ValNodePtr data_list = NULL, prev = NULL;
1619 
1620   DELTA = n-m;
1621   max_d = m+1;
1622   if (DELTA<0)
1623       { /* if (abs(DELTA) > MAX((Int4)(ALPHA*DEFAULT_W),(Int4)(CUTOFF*n)))
1624             fprintf(stderr,"\nWarning: data do not support proper segmentation!\n\n");
1625 */
1626 
1627         /* record block, anyways */
1628         sgp->Blist[data->b_offset].init_pos1 = offset1+1;
1629         sgp->Blist[data->b_offset].init_pos2 = offset2+1;
1630         sgp->Blist[data->b_offset].end_pos1 = offset1+m;
1631         sgp->Blist[data->b_offset].end_pos2 = offset2+n;
1632         **last = **first = data->b_offset;
1633 
1634         *dist = -1;
1635         return ++bno;
1636       }
1637 
1638   ORIGIN = m;
1639   for (row=m, col=n; row>0 && col>0 && (s1[row-1]==s2[col-1]); row--,col--);
1640 
1641   if (row == 0) {
1642        /* hit last row; stop search */
1643        sgp->Blist[data->b_offset].init_pos1 = offset1+1;
1644        sgp->Blist[data->b_offset].end_pos1 = offset1+m;
1645        sgp->Blist[data->b_offset].init_pos2 = offset2-m+n+1;
1646        sgp->Blist[data->b_offset].end_pos2 = offset2+n;
1647        **last = **first = data->b_offset;
1648 
1649        *dist = 0;
1650        return ++bno;
1651   }
1652 
1653   blast_d = (Int4Ptr *)MemNew((m+1)*sizeof(Int4Ptr ));
1654   bscript = (struct edit ***)MemNew((m+1)*sizeof(struct edit **));
1655   for (d=0; d<=m; d++) {
1656        blast_d[d] = (Int4Ptr )MemNew((m+n+1)*sizeof(Int4));
1657        bscript[d] = (struct edit **)MemNew((m+n+1)*sizeof(struct edit *));
1658   }
1659 
1660   for (d=0; d<=m; d++)
1661        for (k=0; k<=m+n; blast_d[d][k++]=m+1);
1662   blast_d[0][ORIGIN+DELTA] = row;
1663   bscript[0][ORIGIN+DELTA] = NULL;
1664 
1665   blower = ORIGIN + DELTA - 1;
1666   bupper = ORIGIN + DELTA + 1;
1667 
1668   for (row=0; row<n && row<m && (s1[row]==s2[row]); row++);
1669 
1670   if (row == m) {
1671        /* hit last row; stop search */
1672        sgp->Blist[data->b_offset].init_pos1 = offset1+1;
1673        sgp->Blist[data->b_offset].end_pos1 = offset1+m;
1674        sgp->Blist[data->b_offset].init_pos2 = offset2+1;
1675        sgp->Blist[data->b_offset].end_pos2 = offset2+m;
1676        **last = **first = data->b_offset;
1677 
1678        *dist = 0;
1679 	   free_this_script (bscript, m+1);
1680 	   free_2D_Int(blast_d, m+1);
1681 
1682        return ++bno;
1683   }
1684 
1685   flast_d = (Int4Ptr *)MemNew((m+1)*sizeof(Int4Ptr ));
1686   fscript = (struct edit ***)MemNew((m+1)*sizeof(struct edit **));
1687   for (d=0; d<=m; d++) {
1688        flast_d[d] = (Int4Ptr )MemNew((m+n+1)*sizeof(Int4));
1689        fscript[d] = (struct edit **)MemNew((m+n+1)*sizeof(struct edit *));
1690   }
1691 
1692 
1693   for (d=0; d<=m; d++)
1694        for (k=0; k<=m+n; flast_d[d][k++]=-1);
1695   flast_d[0][ORIGIN] = row;
1696   fscript[0][ORIGIN] = NULL;
1697 
1698   flower = ORIGIN - 1;
1699   fupper = ORIGIN + 1;
1700 
1701   max_row = (Int4Ptr )MemNew((m+1)*sizeof(Int4));
1702   min_row = (Int4Ptr )MemNew((m+1)*sizeof(Int4));
1703   max_diag = (Int4Ptr )MemNew((m+1)*sizeof(Int4));
1704   min_diag = (Int4Ptr )MemNew((m+1)*sizeof(Int4));
1705 
1706   for (d=1; d<=m; d++) {
1707        min_row[d] = m+1;
1708        max_row[d] = -1;
1709   }
1710   min_row[0] = blast_d[0][ORIGIN+DELTA];
1711   min_diag[0] = ORIGIN+DELTA;
1712   max_row[0] = flast_d[0][ORIGIN];
1713   max_diag[0] = ORIGIN;
1714 
1715   d = 1;
1716   while (d <= max_d) {
1717 
1718         /* for each relevant diagonal ... */
1719         for (k = blower; k <= bupper; k++) {
1720              /* get space for the next edit instruction */
1721              new = (struct edit *)MemNew(sizeof(struct edit));
1722 			 link_to_data_list((Pointer)new, &data_list, &prev);
1723 
1724              /* find a d on diagonal k */
1725              if (k==-d+DELTA+ORIGIN) {
1726 
1727                       /* move left from the last d-1 on diagonal k+1 */
1728                       row = blast_d[d-1][k+1];
1729                       new->link = bscript[d-1][k+1];
1730                       new->op = INS;
1731              }
1732              else if (k==d+DELTA+ORIGIN) {
1733 
1734                       /* move up from the last d-1 on diagonal k-1 */
1735                       row = blast_d[d-1][k-1]-1;
1736                       new->link = bscript[d-1][k-1];
1737                       new->op = DEL;
1738              } else if ((blast_d[d-1][k]<=blast_d[d-1][k+1]) &&
1739                         (blast_d[d-1][k]-1<=blast_d[d-1][k-1])) {
1740 
1741                       /* substitution */
1742                       row = blast_d[d-1][k]-1;
1743                       new->link = bscript[d-1][k];
1744                       new->op = SUB;
1745 
1746              } else if ((blast_d[d-1][k-1]<=blast_d[d-1][k+1]-1) &&
1747                         (blast_d[d-1][k-1]<=blast_d[d-1][k]-1)) {
1748                       /* move right from the last d-1 on diagonal k-1 */
1749                       row = blast_d[d-1][k-1]-1;
1750                       new->link = bscript[d-1][k-1];
1751                       new->op = DEL;
1752              } else  {
1753                       /* move left from the last d-1 on diagonal k+1 */
1754                       row = blast_d[d-1][k+1];
1755                       new->link = bscript[d-1][k+1];
1756                       new->op = INS;
1757              }
1758              /* code common to the three cases */
1759              new->line1 = row;
1760              new->line2 = col = row + k - ORIGIN;
1761              bscript[d][k] = new;
1762 
1763              /* slide up the diagonal */
1764              while (row > 0 && col > 0 && (s1[row-1]==s2[col-1])) {
1765                              --row;
1766                              --col;
1767              }
1768              blast_d[d][k] = row;
1769 
1770              if (row == 0 && col == 0)
1771                         if (d <= max_d) {
1772                             /* hit northwest corner; have the answer */
1773                             bno = bput_scr(bscript[d][k],m,n,offset1,offset2,0,0,data->b_offset, sgp);
1774                             **first = data->b_offset+bno-1;
1775                             **last = data->b_offset;
1776 
1777                             /* for (Cost=0; Cost<=m; Cost++) {
1778                                  MemFree(blast_d[Cost]); MemFree(flast_d[Cost]);
1779                                  MemFree(bscript[Cost]); MemFree(fscript[Cost]);
1780                             } */
1781 							free_this_script (bscript, m+1);
1782 							free_this_script (fscript, m+1);
1783 							free_2D_Int(blast_d, m+1);
1784 							free_2D_Int(flast_d, m+1);
1785 
1786                             MemFree(min_row); MemFree(min_diag);
1787                             MemFree(max_row); MemFree(max_diag);
1788 							ValNodeFreeData(data_list);
1789 
1790                             *dist = d;
1791                             return bno;
1792              } else goto fin;
1793 
1794              if (row == 0)
1795                         /* hit last row; fill in insertions */
1796                         if (d <= max_d) {
1797                              new = (struct edit *) MemNew(sizeof(struct edit));
1798 							 link_to_data_list((Pointer)new, &data_list, &prev);
1799                              new->line1 = 0;
1800                              new->line2 = col = --k - ORIGIN;
1801                              new->op = INS;
1802                              new->link = bscript[d][k+1];
1803                              bscript[d][k] = new;
1804 
1805                              /* record last INS */
1806                              new = (struct edit *)MemNew(sizeof(struct edit));
1807 							 link_to_data_list((Pointer)new, &data_list, &prev);
1808                              new->line1 = 0;
1809                              new->line2 = col = 0;
1810                              new->op = INS;
1811                              new->link = bscript[d][k];
1812 
1813                              bno = bput_scr(new,m,n,offset1,offset2,0,0,data->b_offset, sgp);
1814                              **first = data->b_offset+bno-1;
1815                              **last = data->b_offset;
1816 
1817                              /* for (Cost = 0; Cost<=m; Cost++) {
1818                                   MemFree(blast_d[Cost]); MemFree(flast_d[Cost]);
1819                                   MemFree(bscript[Cost]); MemFree(fscript[Cost]);
1820                              }
1821                              MemFree(min_row); MemFree(min_diag);
1822                              MemFree(max_row); MemFree(max_diag);*/
1823 							free_this_script (bscript, m+1);
1824 							free_this_script (fscript, m+1);
1825 							free_2D_Int(blast_d, m+1);
1826 							free_2D_Int(flast_d, m+1);
1827 
1828                             MemFree(min_row); MemFree(min_diag);
1829                             MemFree(max_row); MemFree(max_diag);
1830 							ValNodeFreeData(data_list);
1831 
1832 
1833                              *dist = d;
1834                              return bno;
1835 
1836                         } else goto fin;
1837 
1838              if (col == 0)
1839                         if (d <= max_d) {
1840                              /* hit last column; fill in deletions */
1841                              /* record first DEL */
1842                              new = (struct edit *) MemNew(sizeof(struct edit));
1843 							 link_to_data_list((Pointer)new, &data_list, &prev);
1844                              new->line1 = -(++k) + ORIGIN;
1845                              new->line2 = 0;
1846                              new->op = DEL;
1847                              new->link = bscript[d][k-1];
1848                              bscript[d][k] = new;
1849 
1850                              /* record last DEL */
1851                              new = (struct edit *)MemNew(sizeof(struct edit));
1852 							 link_to_data_list((Pointer)new, &data_list, &prev);
1853                              new->line1 = 0;
1854                              new->line2 = 0;
1855                              new->op = DEL;
1856                              new->link = bscript[d][k];
1857 
1858                              bno = bput_scr(new,m,n,offset1,offset2,0,0,data->b_offset, sgp);
1859                              **first = data->b_offset+bno-1;
1860                              **last = data->b_offset;
1861 
1862                              /* for (Cost = 0; Cost<=m; Cost++) {
1863                                   MemFree(blast_d[Cost]); MemFree(flast_d[Cost]);
1864                                   MemFree(bscript[Cost]); MemFree(fscript[Cost]);
1865                              }
1866                              MemFree(min_row); MemFree(min_diag);
1867                              MemFree(max_row); MemFree(max_diag);*/
1868 
1869 							free_this_script (bscript, m+1);
1870 							free_this_script (fscript, m+1);
1871 							free_2D_Int(blast_d, m+1);
1872 							free_2D_Int(flast_d, m+1);
1873 
1874                             MemFree(min_row); MemFree(min_diag);
1875                             MemFree(max_row); MemFree(max_diag);
1876 							ValNodeFreeData(data_list);
1877 
1878 
1879                              *dist = d;
1880                              return bno;
1881                         } else goto fin;
1882 
1883         }     /* for k */
1884 
1885         min_row[d] = blast_d[d][DELTA+ORIGIN];
1886         min_diag[d] = DELTA+ORIGIN;
1887         for (k=blower; k<=bupper; ++k)
1888              if (blast_d[d][k]<min_row[d]) {
1889                        min_row[d] = blast_d[d][k];
1890                        min_diag[d] = k;
1891         }
1892 
1893         /* record cell, if paths overlap with minimum combined cost */
1894         /* obs: it suffices to search up to Cost=min(d-1,(max_d-d)) */
1895         for (Cost=0; Cost<d; Cost++)
1896              if ((min_row[d]<=max_row[Cost]) &&
1897                  (max_d > d+Cost)) {
1898                            max_d = d+Cost;
1899                            back = d;
1900                            forth = Cost;
1901                            break;
1902              }
1903 
1904         --blower;
1905         ++bupper;
1906 
1907 
1908         /* for each relevant diagonal ... */
1909         for (k = flower; k <= fupper; k++) {
1910                /* get space for the next edit instruction */
1911                new = (struct edit *) MemNew(sizeof(struct edit));
1912 			   link_to_data_list((Pointer)new, &data_list, &prev);
1913 
1914                /* find a d on diagonal k */
1915                if (k==-d+ORIGIN) {
1916                         /* move down from the last d-1 on diagonal k+1 */
1917                         row = flast_d[d-1][k+1]+1;
1918                         new->link = fscript[d-1][k+1];
1919                         new->op = DEL;
1920 
1921                } else if (k==d+ORIGIN) {
1922                         /* move right from the last d-1 on diagonal k-1 */
1923                         row = flast_d[d-1][k-1];
1924                         new->link = fscript[d-1][k-1];
1925                         new->op = INS;
1926 
1927                } else if ((flast_d[d-1][k]>=flast_d[d-1][k+1]) &&
1928                           (flast_d[d-1][k]+1>=flast_d[d-1][k-1])) {
1929 
1930                         /* substitution */
1931                         row = flast_d[d-1][k]+1;
1932                         new->link = fscript[d-1][k];
1933                         new->op = SUB;
1934 
1935                } else if ((flast_d[d-1][k+1]+1>=flast_d[d-1][k-1]) &&
1936                           (flast_d[d-1][k+1]>=flast_d[d-1][k])) {
1937 
1938                         /* move left from the last d-1 on diagonal k+1 */
1939                         row = flast_d[d-1][k+1]+1;
1940                         new->link = fscript[d-1][k+1];
1941                         new->op = DEL;
1942                } else {
1943                         /* move right from the last d-1 on diagonal k-1 */
1944                         row = flast_d[d-1][k-1];
1945                         new->link = fscript[d-1][k-1];
1946                         new->op = INS;
1947 
1948                }
1949                /* code common to the three cases */
1950                new->line1 = row;
1951                new->line2 = col = row + k - ORIGIN;
1952                fscript[d][k] = new;
1953 
1954                /* slide down the diagonal */
1955                if (row>=0)
1956                while (row < m && col < n && (s1[row]==s2[col])) {
1957                                 ++row;
1958                                 ++col;
1959                }
1960                flast_d[d][k] = row;
1961 
1962                if (row == m && col == n)
1963                    if (d<=max_d) {
1964                        /* hit southeast corner; have the answer */
1965                        bno = fput_scr(fscript[d][k],offset1,offset2,m,n,data->b_offset, sgp);
1966 
1967                        **first = data->b_offset;
1968                        **last = data->b_offset+bno-1;
1969 
1970                       /*  for (Cost = 0; Cost<=m; Cost++) {
1971                             MemFree(blast_d[Cost]); MemFree(flast_d[Cost]);
1972                             MemFree(bscript[Cost]); MemFree(fscript[Cost]);
1973                        }
1974                        MemFree(min_row); MemFree(min_diag);
1975                        MemFree(max_row); MemFree(max_diag);	*/
1976 
1977 					   free_this_script (bscript, m+1);
1978 					   free_this_script (fscript, m+1);
1979 					   free_2D_Int(blast_d, m+1);
1980 					   free_2D_Int(flast_d, m+1);
1981 
1982                        MemFree(min_row); MemFree(min_diag);
1983                        MemFree(max_row); MemFree(max_diag);
1984 					   ValNodeFreeData(data_list);
1985 
1986 
1987                        *dist = d;
1988                        return bno;
1989                    } else goto fin;
1990 
1991                if (row == m)
1992                    if (d<=max_d) {
1993                        /* hit last row; don't look forward */
1994                        /* record first INS */
1995                        new = (struct edit *)MemNew(sizeof(struct edit));
1996 					   link_to_data_list((Pointer)new, &data_list, &prev);
1997                        new->line1 = m;
1998                        new->line2 = col = m + (++k) - ORIGIN;
1999                        new->op = INS;
2000                        new->link = fscript[d][k-1];
2001                        fscript[d][k] = new;
2002 
2003                        /* record last INS */
2004                        new = (struct edit *)MemNew(sizeof(struct edit));
2005 					   link_to_data_list((Pointer)new, &data_list, &prev);
2006                        new->line1 = m;
2007                        new->line2 = n;
2008                        new->op = INS;
2009                        new->link = fscript[d][k];
2010 
2011                        bno = fput_scr(new,offset1,offset2,m,n,data->b_offset, sgp);
2012                        **first = data->b_offset;
2013                        **last = data->b_offset+bno-1;
2014 
2015                        /* for (Cost = 0; Cost<=m; Cost++) {
2016                             MemFree(blast_d[Cost]); MemFree(flast_d[Cost]);
2017                             MemFree(bscript[Cost]); MemFree(fscript[Cost]);
2018                        }
2019                        MemFree(min_row); MemFree(min_diag);
2020                        MemFree(max_row); MemFree(max_diag);	*/
2021 
2022 					   free_this_script (bscript, m+1);
2023 					   free_this_script (fscript, m+1);
2024 					   free_2D_Int(blast_d, m+1);
2025 					   free_2D_Int(flast_d, m+1);
2026 
2027                        MemFree(min_row); MemFree(min_diag);
2028                        MemFree(max_row); MemFree(max_diag);
2029 					   ValNodeFreeData(data_list);
2030 
2031 
2032                        *dist = d;
2033                        return bno;
2034 
2035                    } else goto fin;
2036 
2037                if (col == n)
2038                    /* hit last column; don't look forward */
2039                    if (d<=max_d) {
2040                        /* record first DEL */
2041                        new = (struct edit *)MemNew(sizeof(struct edit));
2042 					   link_to_data_list((Pointer)new, &data_list, &prev);
2043                        new->line1 = n - (--k) + ORIGIN;
2044                        new->line2 = n;
2045                        new->op = DEL;
2046                        new->link = fscript[d][k+1];
2047                        fscript[d][k] = new;
2048 
2049                        /* record last DEL */
2050                        new = (struct edit *)MemNew(sizeof(struct edit));
2051 					   link_to_data_list((Pointer)new, &data_list, &prev);
2052                        new->line1 = m;
2053                        new->line2 = n;
2054                        new->op = DEL;
2055                        new->link = fscript[d][k];
2056 
2057                        bno = fput_scr(new,offset1,offset2,m,n,data->b_offset, sgp);
2058                        **first = data->b_offset;
2059                        **last = data->b_offset+bno-1;
2060 
2061                        /* for (Cost = 0; Cost<=m; Cost++) {
2062                             MemFree(blast_d[Cost]); MemFree(flast_d[Cost]);
2063                             MemFree(bscript[Cost]); MemFree(fscript[Cost]);
2064                        }
2065                        MemFree(min_row); MemFree(min_diag);
2066                        MemFree(max_row); MemFree(max_diag);	*/
2067 
2068 					   free_this_script (bscript, m+1);
2069 					   free_this_script (fscript, m+1);
2070 					   free_2D_Int(blast_d, m+1);
2071 					   free_2D_Int(flast_d, m+1);
2072 
2073                        MemFree(min_row); MemFree(min_diag);
2074                        MemFree(max_row); MemFree(max_diag);
2075 					   ValNodeFreeData(data_list);
2076 
2077 
2078                        *dist = d;
2079                        return bno;
2080                    } else goto fin;
2081           }     /* for k */
2082 
2083 
2084           max_row[d] = flast_d[d][ORIGIN];
2085           max_diag[d] = ORIGIN;
2086           for (k=flower; k<=fupper; ++k)
2087                  if (flast_d[d][k]>max_row[d]) {
2088                      max_row[d] = flast_d[d][k];
2089                      max_diag[d] = k;
2090                  }
2091 
2092           /* record backward and forward limits, if minimum combined
2093            * cost in overlapping. Note: it suffices to search up to
2094            * Cost=min(d,(max_d-d)).
2095            */
2096           for (Cost=0; Cost<=d; Cost++)
2097              if ((min_row[Cost]<=max_row[d]) &&
2098                  (max_d > d+Cost)) {
2099                       max_d = d+Cost;
2100                       back = Cost;
2101                       forth = d;
2102                       break;
2103              }
2104         --flower;
2105         ++fupper;
2106 
2107         ++d;  /* for d */
2108       }
2109 
2110 
2111   fin:
2112         if (m-min_row[back]>=max_row[forth]) {
2113             bno = bput_scr(bscript[back][min_diag[back]],m,n,
2114                            offset1,offset2,
2115                            min_row[back],
2116                            min_row[back]+min_diag[back]-ORIGIN,
2117                            data->b_offset, sgp);
2118             **first = (bno) ? data->b_offset+bno-1 : data->b_offset+bno;
2119             col = fput_scr(fscript[forth][max_diag[forth]],
2120                            offset1,offset2,
2121                            min_row[back],
2122                            min_row[back]+max_diag[forth]-ORIGIN,
2123                            data->b_offset+bno, sgp);
2124         } else {
2125             bno = bput_scr(bscript[back][min_diag[back]],m,n,
2126                            offset1,offset2,
2127                            max_row[forth],
2128                            max_row[forth]+min_diag[back]-ORIGIN,
2129                            data->b_offset, sgp);
2130             **first = (bno) ? data->b_offset+bno-1 : data->b_offset+bno;
2131             col = fput_scr(fscript[forth][max_diag[forth]],
2132                            offset1,offset2,
2133                            max_row[forth],
2134                            max_row[forth]+max_diag[forth]-ORIGIN,
2135                            data->b_offset+bno, sgp);
2136         }
2137         if (bno) sgp->Blist[data->b_offset].next = data->b_offset+bno;
2138         **last = data->b_offset+bno+col-1;
2139 
2140         /* for (Cost=0; Cost<=m; Cost++) {
2141                      MemFree(blast_d[Cost]); MemFree(flast_d[Cost]);
2142                      MemFree(bscript[Cost]); MemFree(fscript[Cost]);
2143         }
2144         MemFree(min_row); MemFree(min_diag);
2145         MemFree(max_row); MemFree(max_diag); */
2146 
2147 		free_this_script (bscript, m+1);
2148 		free_this_script (fscript, m+1);
2149 		free_2D_Int(blast_d, m+1);
2150 		free_2D_Int(flast_d, m+1);
2151 
2152         MemFree(min_row); MemFree(min_diag);
2153         MemFree(max_row); MemFree(max_diag);
2154 		ValNodeFreeData(data_list);
2155 
2156 
2157         *dist = back+forth;
2158         return bno+col;
2159 
2160 }
2161 
2162 
bput_scr(struct edit * start,Int4 len1,Int4 len2,Int4 offset1,Int4 offset2,Int4 row,Int4 col,Int4 offset,SIM4GlobalPtr sgp)2163 static Int4 bput_scr(struct edit *start, Int4 len1, Int4 len2,
2164                     Int4 offset1, Int4 offset2, Int4 row, Int4 col,
2165                     Int4 offset, SIM4GlobalPtr sgp)
2166 {       struct edit *eptr;
2167         Int4 bno=0, ppos1, ppos2, i;
2168 
2169         eptr = start;
2170         ppos1 = row; ppos2 = col-1;
2171 
2172         while (eptr != NULL) {
2173            if (ppos1>eptr->line1)        /* not used in this version */
2174                      eptr = eptr->link;
2175            else
2176            switch (eptr->op) {
2177                 case DEL     :
2178                 case SUB :
2179                      eptr = eptr->link;
2180                      break;
2181                 case INS     :
2182                      if (ppos1==eptr->line1) {
2183                          ppos2 = eptr->line2;
2184                      } else {
2185                          sgp->Blist[bno+offset].init_pos1 = ppos1+offset1+1;
2186                          sgp->Blist[bno+offset].end_pos1 = eptr->line1-1+offset1+1;
2187                          sgp->Blist[bno+offset].init_pos2 = ppos2+1+offset2+1;
2188                          sgp->Blist[bno+offset].end_pos2 = eptr->line2-1+offset2+1;
2189                          ppos1 = eptr->line1;
2190                          ppos2 = eptr->line2;
2191                          bno++;
2192                      }
2193                      eptr=eptr->link;
2194            }
2195         }
2196         if ((ppos1!=len1) || (ppos2+1!=len2)) {
2197 
2198                   sgp->Blist[bno+offset].init_pos1 = ppos1+offset1+1;
2199                   sgp->Blist[bno+offset].end_pos1 = len1-1+offset1+1;
2200                   sgp->Blist[bno+offset].init_pos2 = ppos2+1+offset2+1;
2201                   sgp->Blist[bno+offset].end_pos2 = len2-1+offset2+1;
2202                   bno++;
2203         }
2204         for (i=offset; i<bno+offset; i++)
2205                   sgp->Blist[i+1].next = i;
2206 
2207         return bno;
2208 }
2209 
fput_scr(struct edit * start,Int4 offset1,Int4 offset2,Int4 row,Int4 col,Int4 offset,SIM4GlobalPtr sgp)2210 static Int4 fput_scr(struct edit *start, Int4 offset1, Int4 offset2,
2211                     Int4 row, Int4 col, Int4 offset, SIM4GlobalPtr sgp)
2212 {       struct edit *eptr;
2213         Int4 bno=0, ppos1, ppos2, i;
2214 
2215         eptr = start;
2216         ppos1 = row; ppos2 = col+1;
2217 
2218         while (eptr != NULL) {
2219            if (ppos1 < eptr->line1)        /* not used in this version */
2220                      eptr = eptr->link;
2221            else
2222            switch (eptr->op) {
2223                 case DEL     :
2224                 case SUB :
2225                      eptr = eptr->link;
2226                      break;
2227                 case INS     :
2228                      if (ppos1==eptr->line1) {
2229                          ppos2 = eptr->line2;
2230                      } else {
2231                          sgp->Blist[bno+offset].init_pos1 = eptr->line1+1+offset1;
2232                          sgp->Blist[bno+offset].end_pos1 = ppos1+offset1;
2233                          sgp->Blist[bno+offset].init_pos2 = eptr->line2+1+offset2;
2234                          sgp->Blist[bno+offset].end_pos2 = ppos2-1+offset2;
2235                          ppos1 = eptr->line1;
2236                          ppos2 = eptr->line2;
2237                          bno++;
2238                      }
2239                      eptr=eptr->link;
2240            }
2241         }
2242         if (ppos1!=0) {  /* not a final insert */
2243                   sgp->Blist[bno+offset].init_pos1 = 1+offset1;
2244                   sgp->Blist[bno+offset].end_pos1 = ppos1+offset1;
2245                   sgp->Blist[bno+offset].init_pos2 = 1+offset2;
2246                   sgp->Blist[bno+offset].end_pos2 = ppos2-1+offset2;
2247                   bno++;
2248         }
2249 
2250         for (i=offset; i<bno+offset; i++)
2251            sgp->Blist[i].next = i+1;
2252 
2253         return bno;
2254 }
2255 
path(Int4 i1,Int4 j1,Int1 type1,Int4 i2,Int4 j2,Int1 type2,Int4 dist,EditScriptPtr PNTR head,EditScriptPtr PNTR tail,data_t * data)2256 static void path(Int4 i1, Int4 j1, Int1 type1, Int4 i2, Int4 j2, Int1 type2, Int4 dist,
2257 		 EditScriptPtr PNTR head, EditScriptPtr PNTR tail, data_t *data)
2258 {
2259   Int4Ptr SS, DD, II;       /* Forward vectors */
2260   Int4Ptr RS, RD, RI;       /* Backward vectors */
2261 
2262   EditScriptPtr head1, tail1, head2, tail2;
2263   Int4 midc, rmidc;
2264   Int4 start, lower, upper;
2265   Int4 rstart, rlower, rupper;
2266   Int4 c, k, t1, t2, t;
2267   Int4 maxint;
2268   Int4 mi, mj, mtype;
2269   Int1 flag;
2270 
2271   /* Boundary cases */
2272   if (i1 == i2) {
2273     if (j1 == j2) *head = NULL;
2274     else {
2275       head1 = (EditScriptPtr) MemNew(sizeof(EditScript));
2276       head1->op_type = INS;
2277       head1->num = j2-j1;
2278       head1->next = NULL;
2279       *head = *tail = head1;
2280     }
2281     return;
2282   }
2283 
2284   if (j1 == j2) {
2285     head1 = (EditScriptPtr) MemNew(sizeof(EditScript));
2286     head1->op_type = DEL;
2287     head1->num = i2-i1;
2288     head1->next = NULL;
2289     *head = *tail = head1;
2290     return;
2291   }
2292 
2293   if (dist <= 1) {
2294     if (j2-i2 == j1-i1) {
2295       head1 = (EditScriptPtr) MemNew(sizeof(EditScript));
2296       head1->op_type = SUB;
2297       head1->num = i2-i1;
2298       head1->next = NULL;
2299       *head = *tail = head1;
2300     } else if (j2-i2 > j1-i1) {
2301       if (type1 == INS) {
2302 	head1 = (EditScriptPtr) MemNew(sizeof(EditScript));
2303 	head1->op_type = INS;
2304 	head1->num = 1;
2305 	head2 = (EditScriptPtr) MemNew(sizeof(EditScript));
2306 	head2->op_type = SUB;
2307 	head2->num = i2-i1;
2308       } else {
2309 	head1 = (EditScriptPtr) MemNew(sizeof(EditScript));
2310 	head1->op_type = SUB;
2311 	head1->num = i2-i1;
2312 	head2 = (EditScriptPtr) MemNew(sizeof(EditScript));
2313 	head2->op_type = INS;
2314 	head2->num = 1;
2315       }
2316       head1->next = head2;
2317       head2->next = NULL;
2318       *head = head1;
2319       *tail = head2;
2320     } else if (j2-i2 < j1-i1) {
2321       if (type1 == DEL) {
2322 	head1 = (EditScriptPtr) MemNew(sizeof(EditScript));
2323 	head1->op_type = DEL;
2324 	head1->num = 1;
2325 	head2 = (EditScriptPtr) MemNew(sizeof(EditScript));
2326 	head2->op_type = SUB;
2327 	head2->num = j2-j1;
2328       } else {
2329 	head1 = (EditScriptPtr) MemNew(sizeof(EditScript));
2330 	head1->op_type = SUB;
2331 	head1->num = j2-j1;
2332 	head2 = (EditScriptPtr) MemNew(sizeof(EditScript));
2333 	head2->op_type = DEL;
2334 	head2->num = 1;
2335       }
2336       head1->next = head2;
2337       head2->next = NULL;
2338       *head = head1;
2339       *tail = head2;
2340     }
2341     return;
2342   }
2343 
2344   /* Divide the problem at the middle cost */
2345   midc = dist/2;
2346   rmidc = dist - midc;
2347 
2348   /* Compute the boundary diagonals */
2349   start = j1 - i1;
2350   lower = MAX(j1-i2, start-midc);
2351   upper = MIN(j2-i1, start+midc);
2352   rstart = j2-i2;
2353   rlower = MAX(j1-i2, rstart-rmidc);
2354   rupper = MIN(j2-i1, rstart+rmidc);
2355 
2356   /* Allocate space for forward vectors */
2357   SS = (Int4Ptr )MemNew((upper-lower+1)*sizeof(Int4)) - lower;
2358   DD = (Int4Ptr )MemNew((upper-lower+2)*sizeof(Int4)) - lower;
2359   II = (Int4Ptr )MemNew((upper-lower+2)*sizeof(Int4)) - lower + 1;
2360 
2361   /* Forward computation */
2362   for (k=lower; k<=upper; ++k) SS[k] = MININT;
2363   for (k=lower; k<=upper+1; ++k) DD[k] = MININT;
2364   for (k=lower-1; k<=upper; ++k) II[k] = MININT;
2365   if (type1 == SUB) SS[start] = snake(start, i1, i2, j2, data);
2366   else if (type1 == DEL) {
2367     DD[start] = i1;
2368     SS[start] = snake(start, i1, i2, j2, data);
2369   } else {
2370     II[start] = i1;
2371     SS[start] = snake(start, i1, i2, j2, data);
2372   }
2373 
2374   for (c = 1; c <= midc; ++c) {
2375     t = MAX(lower, start-c);
2376     t1 = II[t-1];
2377     for (k = t; k <= MIN(upper, start + c); ++k) {
2378       t2 = II[k];
2379       II[k] = MAX(t1, SS[k]);
2380       t1 = t2;
2381       DD[k] = MAX(DD[k + 1] + 1, SS[k]);
2382       SS[k] = snake(k, MIN(j2 - k, MAX(MAX(SS[k] + 1, II[k]), DD[k])), i2, j2, data);
2383     }
2384   }
2385 
2386   /* Allocate space for backward vectors */
2387   RS = (Int4Ptr )MemNew((rupper-rlower+1)*sizeof(Int4)) - rlower;
2388   RD = (Int4Ptr )MemNew((rupper-rlower+2)*sizeof(Int4)) - rlower + 1;
2389   RI = (Int4Ptr )MemNew((rupper-rlower+2)*sizeof(Int4)) - rlower;
2390 
2391   /* Backward computation */
2392   maxint = i2 + dist + data->N;
2393   for(k=rlower; k<=rupper; ++k) RS[k] = maxint;
2394   for(k=rlower-1; k<=rupper; ++k) RD[k] = maxint;
2395   for(k=rlower; k<=rupper+1; ++k) RI[k] = maxint;
2396   if(type2 == SUB)
2397     RI[rstart] = RD[rstart] = RS[rstart] = rsnake(rstart, i2, i1, j1, data);
2398   else if (type2 == DEL) RD[rstart] = i2;
2399   else RI[rstart] = i2;
2400 
2401   for(c = 1; c <= rmidc; ++c) {
2402     t = MAX(rlower, rstart - c);
2403     t1 = RD[t - 1];
2404     for(k = t; k <= MIN(rupper, rstart + c); ++k) {
2405       RS[k] = rsnake(k, MAX(j1 - k, MIN(MIN(RS[k] - 1, RD[k]), RI[k])), i1, j1, data);
2406       t2 = RD[k];
2407       RD[k] = MIN(t1 - 1, RS[k]);
2408       t1 = t2;
2409       RI[k] = MIN(RI[k + 1], RS[k]);
2410     }
2411   }
2412 
2413   /* Find (mi, mj, mtype) such that
2414      the distance from (i1, j1, type1) to (mi, mj, mtype) is midc
2415      and the distance from (mi, mj, mtype) to (i2, j2, type2) is rmidc.
2416      */
2417 
2418   flag = FALSE;
2419   for (k=MAX(lower,rlower); k<=MIN(upper,rupper);++k) {
2420 
2421     if (SS[k]>=RS[k] || DD[k]>=RD[k] || II[k]>=RI[k]) {
2422       if (DD[k]>=RD[k]) {
2423 	mi = DD[k];
2424 	mj = k+mi;
2425 	mtype = DEL;
2426       } else if (II[k] >= RI[k]) {
2427 	mi = II[k];
2428 	mj = k+mi;
2429 	mtype = INS;
2430       } else {
2431 	mi = SS[k];
2432 	mj = k+mi;
2433 	mtype = SUB;
2434       }
2435 
2436       flag = TRUE;
2437       break;
2438     }
2439   }
2440 
2441   /* Free working vectors */
2442   MemFree(SS+lower);
2443   MemFree(DD+lower);
2444   MemFree(II+lower-1);
2445   MemFree(RS+rlower);
2446   MemFree(RD+rlower-1);
2447   MemFree(RI+rlower);
2448 
2449   if (flag) {
2450     /* Find a path from (i1,j1,type1) to (mi,mj,mtype) */
2451     path(i1, j1, type1, mi, mj, mtype, midc, &head1, &tail1, data);
2452 
2453     /* Find a path from (mi,mj,mtype) to (i2,j2,type2) */
2454     path(mi, mj, mtype, i2, j2, type2, rmidc, &head2, &tail2, data);
2455 
2456     /* Join these two paths together */
2457     if(head1) tail1->next = head2;
2458     else head1 = head2;
2459   }
2460   else {
2461     ErrPostEx(SEV_WARNING, 0, 0, "Something wrong when dividing");
2462 
2463     /* printf("Something wrong when dividing\n"); */
2464     head1 = NULL;
2465   }
2466   *head = head1;
2467   if (head2) *tail = tail2;
2468   else *tail = tail1;
2469 }
2470 
2471 /* Condense_script - merge contiguous operations of the same type together */
Condense_script(EditScriptPtr head)2472 static void Condense_script(EditScriptPtr head)
2473 {
2474         EditScriptPtr tp, tp1;
2475 
2476         tp = head;
2477         while (tp != NULL) {
2478            while (((tp1 = tp->next) != NULL) && (tp->op_type == tp1->op_type)) {
2479                  tp->num = tp->num + tp1->num;
2480                  tp->next = tp1->next;
2481                  MemFree(tp1);
2482            }
2483            tp = tp->next;
2484         }
2485 }
2486 
Reverse_Script(EditScriptPtr head)2487 static void Reverse_Script(EditScriptPtr head)
2488 {
2489         EditScriptPtr tp;
2490 
2491         tp = head;
2492         while (tp != NULL) {
2493            if (tp->op_type == INS) tp->op_type = DEL;
2494            else if (tp->op_type == DEL) tp->op_type = INS;
2495            tp = tp->next;
2496         }
2497 }
2498 
Print_Script(EditScriptPtr head,Int4 M,Int4 N,data_t * data)2499 static void Print_Script(EditScriptPtr head, Int4 M, Int4 N, data_t *data)
2500 {
2501   EditScriptPtr tp;
2502   Int4 i, j, k, count;
2503 
2504   i = j = 0;
2505   tp = head;
2506 
2507   while(tp) {
2508     if(tp->op_type == SUB) {
2509       k = 0;
2510       while(k < tp->num) {
2511 	count = 0;
2512 	while((data->seq1[i] == data->seq2[j]) && (k < tp->num)) {
2513 	  ++i;
2514 	  ++j;
2515 	  ++count;
2516 	  ++k;
2517 	}
2518 	if(count > 0) fprintf(stdout, "copy %d\n", count);
2519 	if(k < tp->num) {
2520 	  fprintf(stdout, "replace %c by %c\n", data->seq1[i], data->seq2[j]);
2521 	  ++i;
2522 	  ++j;
2523 	  ++k;
2524 	}
2525       }
2526     }
2527     else if (tp->op_type == INS) {
2528       if((tp == head || tp->next == NULL) && (M <= N))
2529 	fprintf(stdout, "skip (second sequence) %d\n", tp->num);
2530       else {
2531 	/*
2532 	  printf("insert %d\n", tp->num);
2533 	  */
2534 	fprintf(stdout, "insert ");
2535 	if(tp->num <= 10)
2536 	  for(k = j; k < j + tp->num; ++k) fprintf(stdout, "%c", data->seq2[k]);
2537 	else fprintf(stdout, " %d ", tp->num);
2538 	fprintf(stdout, "\n");
2539       }
2540       j += tp->num;
2541     }
2542     else {     /* DEL */
2543       if((tp == head || tp->next == NULL) && (M > N))
2544 	fprintf(stdout, "skip (first sequence) %d\n", tp->num);
2545       else {
2546 	/*
2547 	  printf("delete %d\n", tp->num);
2548 	  */
2549 	fprintf(stdout, "delete ");
2550 	if(tp->num <= 10)
2551 	  for(k=i; k<i+tp->num; ++k)
2552 	    fprintf(stdout, "%c", data->seq1[k]);
2553 	else
2554 	  fprintf(stdout, "%d ", tp->num);
2555 	fprintf(stdout, "\n");
2556       }
2557       i += tp->num;
2558     }
2559     tp = tp->next;
2560   }
2561 }
2562 
S2A(EditScriptPtr head,Int4Ptr S)2563 static void S2A(EditScriptPtr head, Int4Ptr S)
2564 {
2565         EditScriptPtr tp;
2566         Int4Ptr lastS;
2567 	Int4 i;
2568 
2569         tp = head;
2570         lastS = S;
2571         while (tp != NULL) {
2572 /*
2573         printf("tp->op_type=%d, tp->num=%d\n",tp->op_type, tp->num);
2574 */
2575            if (tp->op_type == SUB) {
2576                 for (i=0; i<tp->num; ++i) *lastS++ = 0;
2577            } else if (tp->op_type == INS) {
2578                 *lastS++ = tp->num;
2579            } else {     /* DEL */
2580                 *lastS++ = 0 - tp->num;
2581            }
2582            tp = tp->next;
2583         }
2584 }
2585 
2586 
2587 
2588 /* Alignment display routine */
2589 
2590 static Int1 ALINE[51], BLINE[51], CLINE[51];
2591 
DISPLAY(Int1 A[],Int1 B[],Int4 M,Int4 N,Int4 S[],Int4 AP,Int4 BP)2592 static void DISPLAY(Int1 A[], Int1 B[], Int4 M, Int4 N, Int4 S[], Int4 AP, Int4 BP)
2593 { register Int1 *a, *b, *c;
2594   register Int4   i,  j, op;
2595            Int4   lines, ap, bp;
2596 
2597   i = j = op = lines = 0;
2598   ap = AP;
2599   bp = BP;
2600   a = ALINE;
2601   b = BLINE;
2602   c = CLINE;
2603   while (i < M || j < N)
2604     { if (op == 0 && *S == 0)
2605         { op = *S++;
2606           *a = A[++i];
2607           *b = B[++j];
2608           *c++ = (*a++ == *b++) ? '|' : ' ';
2609         }
2610       else
2611         { if (op == 0)
2612             op = *S++;
2613           if (op > 0)
2614             { *a++ = ' ';
2615               *b++ = B[++j];
2616               op--;
2617             }
2618           else
2619             { *a++ = A[++i];
2620               *b++ = ' ';
2621               op++;
2622             }
2623           *c++ = '-';
2624         }
2625       if (a >= ALINE+50 || i >= M && j >= N)
2626         { *a = *b = *c = '\0';
2627           fprintf(stdout, "\n%6ld ",(long int)50*lines++);
2628           for (b = ALINE+10; b <= a; b += 10)
2629             fprintf(stdout, "    .    :");
2630           if (b <= a+5)
2631             fprintf(stdout, "    .");
2632           fprintf(stdout, "\n%6d %s\n       %s\n%6d %s\n",ap,ALINE,CLINE,bp,BLINE);
2633           ap = AP + i;
2634           bp = BP + j;
2635           a = ALINE;
2636           b = BLINE;
2637           c = CLINE;
2638         }
2639     }
2640 }
2641 
2642 
2643 
snake(Int4 k,Int4 x,Int4 endx,Int4 endy,data_t * data)2644 static Int4 snake(Int4 k, Int4 x, Int4 endx, Int4 endy, data_t *data)
2645 {
2646   Int4 y;
2647 
2648   if(x < 0) return x;
2649   y = x + k;
2650 
2651   while (x < endx && y < endy && data->seq1[x] == data->seq2[y]) {
2652     ++x; ++y;
2653   }
2654 
2655   return x;
2656 }
2657 
2658 
rsnake(Int4 k,Int4 x,Int4 startx,Int4 starty,data_t * data)2659 static Int4 rsnake(Int4 k, Int4 x, Int4 startx, Int4 starty, data_t *data)
2660 {
2661   Int4 y;
2662 
2663   if(x > data->M) return x;
2664   y = x + k;
2665   while(x > startx && y > starty && data->seq1[x - 1] == data->seq2[y - 1]) {
2666     --x; --y;
2667   }
2668 
2669   return x;
2670 }
2671 
get_dist(Int4 i1,Int4 j1,Int4 i2,Int4 j2,data_t * data)2672 static Int4 get_dist(Int4 i1, Int4 j1, Int4 i2, Int4 j2, data_t *data)
2673 {
2674   Int4Ptr SS, DD, II;
2675   Int4 goal_diag;
2676   Int4 c, k, t1, t2, t;
2677   Int4 start, lower, upper;
2678 
2679   /* Compute the boundary diagonals */
2680   /* printf("i1 = %ld, j1 = %ld, i2= %ld, j2=%ld, limit = %ld", i1, j1, i2, j2, data->limit); */
2681   start = j1 - i1;
2682   lower = MAX(j1 - i2, start - data->limit);
2683   upper = MIN(j2 - i1, start + data->limit);
2684   goal_diag = j2 - i2;
2685 
2686   if(goal_diag > upper || goal_diag < lower) {
2687 	/* ErrPostEx(SEV_WARNING, 0, 0, "The two sequences are not really similar"); */
2688 	return -1;
2689 
2690     /* printf("The two sequences are not really similar.\n");
2691     printf("Please try an exact aligning method.\n");
2692     exit(1); */
2693   }
2694 
2695   /* Allocate space for forward vectors */
2696   SS = (Int4Ptr)MemNew((upper - lower + 1) * sizeof(Int4)) - lower;
2697   DD = (Int4Ptr)MemNew((upper - lower + 2) * sizeof(Int4)) - lower;
2698   II = (Int4Ptr)MemNew((upper - lower + 2) * sizeof(Int4)) - lower + 1;
2699 
2700   /* Initialization */
2701   for(k = lower; k <= upper; ++k) SS[k] = MININT;
2702   for(k = lower; k <= upper + 1; ++k) DD[k] = MININT;
2703   for(k = lower - 1; k <= upper; ++k) II[k] = MININT;
2704   SS[start] = snake(start, i1, i2, j2, data);
2705 
2706   if(SS[goal_diag] >= i2) {
2707     /*
2708       #ifdef STATS
2709       printf("get_dist = %d\n", 0);
2710       #endif
2711       */
2712 
2713     /* Free working vectors */
2714     MemFree(SS+lower);
2715     MemFree(DD+lower);
2716     MemFree(II+lower-1);
2717     return 0;
2718   }
2719 
2720   for(c = 1; c <= data->limit; ++c) {
2721     t = MAX(lower, start-c);
2722     t1 = II[t - 1];
2723     for(k = t; k <= MIN(upper, start + c); ++k) {
2724       t2 = II[k];
2725       II[k] = MAX(t1, SS[k]);
2726       t1 = t2;
2727       DD[k] = MAX(DD[k + 1] + 1, SS[k]);
2728       SS[k] = snake(k, MIN(j2 - k,MAX(MAX(SS[k] + 1, II[k]), DD[k])), i2, j2, data);
2729     }
2730 
2731     if (SS[goal_diag] >= i2) {
2732 #ifdef STATS
2733       printf("get_dist = %d\n",c);
2734 #endif
2735 
2736       /* Free working vectors */
2737       MemFree(SS + lower);
2738       MemFree(DD + lower);
2739       MemFree(II + lower - 1);
2740       return c;
2741     }
2742   }
2743 
2744   /* Ran out of distance limit */
2745   /* ErrPostEx(SEV_WARNING, 0, 0, "The two sequences are not really similar"); */
2746   return -1;
2747   /* printf("The two sequences are not really similar.\n");
2748   printf("Please try an exact method.\n");
2749   exit(1); */
2750 }
2751 
2752 
locate(Int4Ptr i1,Int4Ptr j1,Int4Ptr i2,Int4Ptr j2,Int1 * A,Int1 * B,Int4 m,Int4 n,Int4 W1,data_t * data)2753 static Int1 locate(Int4Ptr i1, Int4Ptr j1, Int4Ptr i2, Int4Ptr j2,
2754                    Int1 *A, Int1 *B, Int4 m, Int4 n, Int4 W1,
2755 		   data_t *data)
2756 {
2757   Int4 mask;
2758   Int4 BUCKETS;
2759   Int1Ptr bucket;
2760   Int4 i, j;
2761   Int1Ptr p, q;
2762   Int4 ecode, ecode1;
2763   Int4 fcount, bcount, fmax, bmax;
2764   float trust_ratio;
2765   Int4 paired_fj, paired_bj, best_range, t;
2766   ValNodePtr prev = NULL, data_list = NULL;
2767 
2768   typedef struct max_chain {
2769     Int4 j;  /* j position */
2770     struct max_chain *next;
2771   } max_chain;
2772   max_chain *fj, *bj, *tj, *tj1, *tj2;
2773 
2774   if(m < W1 + NW - 1) {
2775     /*              printf("m=%d, The sequences are too short for the heuristic method.\n",m);
2776      */
2777     return FALSE;
2778   }
2779 
2780   /* Set up the hash table (derived from Sequence A) */
2781 
2782   mask = (1 << (W1 + W1 - 2)) - 1;
2783   BUCKETS = power2(2 * W1);
2784   bucket = (Int1Ptr) MemNew(BUCKETS * sizeof(Int1));
2785   for (i = 0; i<BUCKETS; ++i) bucket[i] = 0;
2786 
2787   /* front end */
2788   p = A;
2789 
2790   ecode = 0L;
2791   for(i = 1; i < W1; ++i) {
2792     ecode = (ecode << 2) + data->encoding[*p++];
2793   }
2794 
2795   for(i = 0; i < NW; ++i) {
2796     ecode = ((ecode & mask) << 2) + data->encoding[*p++];
2797     bucket[ecode] = 1;
2798   }
2799   /* back end */
2800   p = A + m - W1 - NW + 1;
2801   ecode = 0L;
2802   for(i = 1; i < W1; ++i)
2803     ecode = (ecode << 2) + data->encoding[*p++];
2804   for(i = 0; i < NW; ++i) {
2805     ecode = ((ecode & mask) << 2) + data->encoding[*p++];
2806     if (bucket[ecode] == 1 || bucket[ecode] == 3) bucket[ecode] = 3;
2807     else bucket[ecode] = 2;
2808   }
2809 
2810   /* Locate the interval with the maximum number of the matched 8-tuples */
2811   trust_ratio = 0.6;
2812   fmax = bmax = NW * trust_ratio;
2813   fj = bj = NULL;
2814   fcount = bcount = 0;
2815   p = B;
2816   ecode = 0L;
2817   for(j = 1; j < W1; ++j)
2818     ecode = (ecode << 2) + data->encoding[*p++];
2819   ecode1 = ecode;
2820   q = p;
2821   for(j = 0; j < NW; ++j) {
2822     ecode = ((ecode & mask) << 2) + data->encoding[*p++];
2823     if(bucket[ecode] == 1 || bucket[ecode] == 3) ++fcount;
2824     if(bucket[ecode] == 2 || bucket[ecode] == 3) ++bcount;
2825   }
2826 
2827   if (fcount >= fmax) {
2828     fj = (max_chain *) MemNew(sizeof(max_chain));
2829     link_to_data_list((Pointer)fj, &data_list, &prev);
2830     fj->j = 0;
2831     fj->next = NULL;
2832     fmax = fcount;
2833   }
2834   if (bcount >= bmax) {
2835     bj = (max_chain *) MemNew(sizeof(max_chain));
2836     link_to_data_list((Pointer)bj, &data_list, &prev);
2837     bj->j = W1 + NW - 2;
2838     bj->next = NULL;
2839     bmax = bcount;
2840   }
2841 
2842   for(j = 1; j <= n - W1 - NW + 1; ++j) {
2843     ecode1 = ((ecode1 & mask) << 2) + data->encoding[*q++];
2844     if (bucket[ecode1] == 1 || bucket[ecode1] == 3) --fcount;
2845     if (bucket[ecode1] == 2 || bucket[ecode1] == 3) --bcount;
2846     ecode = ((ecode & mask) << 2) + data->encoding[*p++];
2847     if (bucket[ecode] == 1 || bucket[ecode] == 3) ++fcount;
2848     if (bucket[ecode] == 2 || bucket[ecode] == 3) ++bcount;
2849 
2850     if(fcount > fmax) {
2851       while(fj != NULL) {
2852 	tj = fj->next;
2853 	/* MemFree(fj); */
2854 	fj = tj;
2855       }
2856       fj = (max_chain *) MemNew(sizeof(max_chain));
2857       link_to_data_list((Pointer)fj, &data_list, &prev);
2858       fj->j = j;
2859       fj->next = NULL;
2860       fmax = fcount;
2861     }
2862     else if(fcount == fmax) {
2863       tj = (max_chain *) MemNew(sizeof(max_chain));
2864       link_to_data_list((Pointer)tj, &data_list, &prev);
2865       tj->j = j;
2866       tj->next = fj;
2867       fj = tj;
2868     }
2869     if(bcount > bmax) {
2870       while(bj != NULL) {
2871 	tj = bj->next;
2872 	/* MemFree(bj); */
2873 	bj = tj;
2874       }
2875       bj = (max_chain *) MemNew(sizeof(max_chain));
2876       link_to_data_list((Pointer)bj, &data_list, &prev);
2877       bj->j = j + W1 + NW - 2;
2878       bj->next = NULL;
2879       bmax = bcount;
2880     }
2881     else if(bcount == bmax) {
2882       tj = (max_chain *) MemNew(sizeof(max_chain));
2883       link_to_data_list((Pointer)tj, &data_list, &prev);
2884       tj->j = j + W1 + NW - 2;
2885       tj->next = bj;
2886       bj = tj;
2887     }
2888   }
2889 
2890   MemFree(bucket);
2891 
2892   if(fj == NULL || bj == NULL)
2893   {
2894      if(data_list != NULL)
2895          ValNodeFreeData(data_list);
2896       return FALSE;
2897   }
2898 
2899   best_range = (Int4)(PERCENT * m) + 1;
2900   for(tj1 = fj; tj1 != NULL; tj1 = tj1->next) {
2901     for(tj2 = bj; tj2 != NULL; tj2 = tj2->next) {
2902       t = tj2->j - tj1->j + 1 - m;
2903       if(t < 0) t = -t;
2904 
2905       if(t < best_range) {
2906 	best_range = t;
2907 	paired_fj = tj1->j;
2908 	paired_bj = tj2->j;
2909       }
2910     }
2911   }
2912 
2913   ValNodeFreeData(data_list);
2914   if(best_range > (Int4)(PERCENT * m)) return FALSE;
2915 
2916   *i1 = 0;
2917   *i2 = m;
2918   *j1 = paired_fj;
2919   *j2 = paired_bj + 1;
2920 
2921   return TRUE;
2922 }
2923