1 static char const rcsid[] = "$Id: gapxdrop.c,v 6.76 2003/11/07 21:52:52 dondosha Exp $";
2 
3 /* ===========================================================================
4 *
5 *                            PUBLIC DOMAIN NOTICE
6 *               National Center for Biotechnology Information
7 *
8 *  This software/database is a "United States Government Work" under the
9 *  terms of the United States Copyright Act.  It was written as part of
10 *  the author's official duties as a United States Government employee and
11 *  thus cannot be copyrighted.  This software/database is freely available
12 *  to the public for use. The National Library of Medicine and the U.S.
13 *  Government have not placed any restriction on its use or reproduction.
14 *
15 *  Although all reasonable efforts have been taken to ensure the accuracy
16 *  and reliability of the software and data, the NLM and the U.S.
17 *  Government do not and cannot warrant the performance or results that
18 *  may be obtained by using this software or data. The NLM and the U.S.
19 *  Government disclaim all warranties, express or implied, including
20 *  warranties of performance, merchantability or fitness for any particular
21 *  purpose.
22 *
23 *  Please cite the author in any work or product based on this material.
24 *
25 * ===========================================================================*/
26 
27 /*****************************************************************************
28 
29 File name: gapxdrop.c
30 
31 Author: Gennadiy Savchuk, Jinqhui Zhang, Tom Madden
32 [From code originally supplied by Webb Miller's lab].
33 
34 Contents: Functions to perform a gapped alignment on two sequences.
35 
36 ****************************************************************************/
37 /* $Revision: 6.76 $
38 * $Log: gapxdrop.c,v $
39 * Revision 6.76  2003/11/07 21:52:52  dondosha
40 * Fixed SEMI_G_ALIGN_EX so it produces correct scores, by making code exactly correspond to ALIGN_EX
41 *
42 * Revision 6.75  2003/08/20 22:11:49  dondosha
43 * Removed some garbage code
44 *
45 * Revision 6.74  2003/06/23 21:09:23  dondosha
46 * Allow left extension from offset 0, since the starting point must be part of the left extension
47 *
48 * Revision 6.73  2003/05/30 17:25:36  coulouri
49 * add rcsid
50 *
51 * Revision 6.72  2003/05/27 20:29:57  dondosha
52 * Fix for OOF alignments starting at offsets 1 or 2 in query
53 *
54 * Revision 6.71  2003/05/15 21:50:08  dondosha
55 * The break in previous change was put in a wrong place; fixed
56 *
57 * Revision 6.70  2003/05/06 18:56:12  dondosha
58 * In ALIGN_packed_nucl: break out of loop if matrix score is MININT, to avoid gapping through sentinel bytes
59 *
60 * Revision 6.69  2003/04/22 17:50:07  dondosha
61 * Correction in OOFGapXEditBlockToSeqAlign for case when gap is followed by a frame shift
62 *
63 * Revision 6.68  2002/08/22 12:26:42  madden
64 * Removed unused variables
65 *
66 * Revision 6.67  2002/07/24 19:14:42  dondosha
67 * Previous fix was incomplete; correcting
68 *
69 * Revision 6.66  2002/07/02 17:08:48  dondosha
70 * If alignment longer than traceback, set traceback remainder to 0
71 *
72 * Revision 6.65  2002/06/11 14:44:48  dondosha
73 * Return status from some functions instead of search block pointer
74 *
75 * Revision 6.64  2002/05/09 17:01:23  dondosha
76 * Renamed typedefs dp_ptr and dp_node to GapXDPPtr and GapXDP
77 *
78 * Revision 6.63  2002/05/08 22:48:26  dondosha
79 * Allocate memory for dynamic programming upfront in Mega BLAST case
80 *
81 * Revision 6.62  2002/03/05 17:53:41  dondosha
82 * Added one sanity check in GapXEditBlockToSeqAlign
83 *
84 * Revision 6.61  2001/08/27 18:56:26  madden
85 * Fix OOFGapXEditBlockToSeqAlign if alignment overruns end of query
86 *
87 * Revision 6.60  2001/07/23 13:47:17  dondosha
88 * Added LIBCALL to GapXEditScriptDelete declaration
89 *
90 * Revision 6.59  2001/07/19 22:13:31  dondosha
91 * Made GapXEditScriptDelete public for use in mblast.c
92 *
93 * Revision 6.58  2001/04/25 13:25:47  madden
94 * Do not memset tback
95 *
96 * Revision 6.57  2000/12/29 20:45:38  madden
97 * Fix for ALIGN_packed_nucl
98 *
99 * Revision 6.56  2000/11/30 21:37:39  madden
100 * Roll back before GapXDropSetAlignMask changes
101 *
102 * Revision 6.55  2000/11/29 21:01:39  madden
103 * Use shorter length for dp_ptr rather than value from X
104 *
105 * Revision 6.54  2000/11/15 15:16:41  madden
106 * Changes to recycle dynamic programming structure
107 *
108 * Revision 6.53  2000/11/09 15:15:52  shavirin
109 * Removed #include <txalign.h>
110 *
111 * Revision 6.52  2000/10/13 21:23:48  shavirin
112 * Fixed OOF problem with truncated alignments.
113 *
114 * Revision 6.51  2000/10/04 20:43:36  shavirin
115 * Fixed case with many multiple frame shifts - protein end corrected
116 * in SeqAlign.
117 *
118 * Revision 6.50  2000/10/04 18:06:40  dondosha
119 * Subtract decline_penalty from initial values of dyn_prog[i].FF
120 *
121 * Revision 6.49  2000/09/07 16:24:27  shavirin
122 * Fixed minor memory leak.
123 *
124 * Revision 6.48  2000/08/30 14:15:16  shavirin
125 * Empty log message.
126 *
127 * Revision 6.47  2000/08/30 13:56:58  shavirin
128 * Removed (ifdef-ed out) OOF debug printout.
129 *
130 * Revision 6.46  2000/08/29 22:09:04  shavirin
131 * Next update to OOF API. Fixed few cases.
132 *
133 * Revision 6.45  2000/08/25 19:00:45  shavirin
134 * Added adjustment to the order of unaligned regions and gaps to ensure,
135 * that unaligned regions will be always from the left from gap if adjacent
136 *
137 * Revision 6.44  2000/08/08 21:44:25  shavirin
138 * Enabled possibility to create discontinuous alignment.
139 *
140 * Revision 6.43  2000/07/26 21:02:26  shavirin
141 * Removed debug printing of OOF alignment.
142 *
143 * Revision 6.42  2000/07/26 20:33:13  kans
144 * included txalign.h
145 *
146 * Revision 6.41  2000/07/25 16:48:07  shavirin
147 * Changed function to create ASN.1 for results of OOF search.
148 *
149 * Revision 6.40  2000/07/18 22:36:01  shavirin
150 * Fixed ABR, ABW errors or purify in PerformGapped..() functions.
151 *
152 * Revision 6.39  2000/07/17 15:24:21  shavirin
153 * Added parameter to function OOFGapXEditBlockToSeqAlign()
154 *
155 * Revision 6.37  2000/07/13 15:10:55  shavirin
156 * Changed SEMI_G_ALIGN to OOF_SEMI_G_ALIGN for is_ooframe == TRUE.
157 *
158 * Revision 6.36  2000/07/12 23:07:31  kans
159 * reverse_seq moved from pseed3.c to blastool.c, placed in blast.h header, called by gapxdrop.c
160 *
161 * Revision 6.35  2000/07/12 14:48:14  kans
162 * added prototype for reverse_seq, copied from pseed3.c - unable to easily include seed.h due to cascading dependency errors
163 *
164 * Revision 6.34  2000/07/11 20:56:08  shavirin
165 * Fixed minor typo in GapAlignBlkDelete().
166 *
167 * Revision 6.33  2000/07/11 20:49:07  shavirin
168 * Added all major functions for Out-Of-Frame alignment.
169 *
170 * Revision 6.32  2000/06/16 20:36:55  madden
171 * Remove MemSet from setup of state structure
172 *
173 * Revision 6.31  2000/05/16 19:59:39  madden
174 * Fix for rpsblast extensions
175 *
176 * Revision 6.30  2000/05/02 17:17:13  madden
177 * Changes to prevent gapping between sequences in rps-blast
178 *
179 * Revision 6.29  2000/03/29 21:54:28  dondosha
180 * Made GapXEditScriptNew public for use in MegaBlastFillHspGapInfo
181 *
182 * Revision 6.28  2000/02/16 21:45:42  shavirin
183 * Fixed memory leaks in GXEMakeSeqAlign() function.
184 *
185 * Revision 6.27  2000/02/11 21:12:43  shavirin
186 * Fixed creating reversed seqalign (gaps).
187 *
188 * Revision 6.26  2000/02/10 16:41:02  shavirin
189 * Fixed creation of seqalign in case of reversed tblastn case.
190 *
191 * Revision 6.25  1999/12/17 20:47:04  egorov
192 * Fix 'gcc -Wall' warnings
193 *
194 * Revision 6.24  1999/11/26 22:07:48  madden
195 * Added PerformNtGappedAlignment and ALIGN_packed_nucl
196 *
197 * Revision 6.23  1999/10/29 14:50:48  shavirin
198 * Workaround to bypass DECLINE regions.
199 *
200 * Revision 6.22  1999/10/27 16:40:34  shavirin
201 * Produced more correct discontinuous SeqAlign if DECLINE regions exists.
202 *
203 * Revision 6.21  1999/10/04 18:28:45  madden
204 * Add ALIGN_EX and SEMI_ALIGN_EX that do not require reversing sequences
205 *
206 * Revision 6.20  1999/09/23 15:28:23  shavirin
207 * Temporary disabled ability to create discontinuous alignment.
208 *
209 * Revision 6.19  1999/09/21 15:52:53  shavirin
210 * Added possibility to create discontinuous alignment in case when at
211 * leaset one DECLINE_ALIGN region exists in results of BLAST search.
212 *
213 * Revision 6.18  1999/08/27 18:07:34  shavirin
214 * Passed parameter decline_align from top to the engine.
215 *
216 * Revision 6.17  1999/05/12 18:48:52  madden
217 * lift assumption that no deletion follows insertion
218 *
219 * Revision 6.16  1999/05/03 18:59:33  madden
220 * Removed set, but unused, variable count
221 *
222 * Revision 6.15  1999/03/17 18:39:11  madden
223 * Removed unneccessary memset
224 *
225 * Revision 6.14  1999/02/18 21:18:49  madden
226 * MINIINT is INT4_MIN/2
227 *
228 * Revision 6.13  1999/02/17 19:42:42  madden
229 * change INT4_MIN to -9999999 again
230 *
231 * Revision 6.11  1998/12/18 16:20:42  madden
232 * Removed unnecessary memsets
233 *
234  * Revision 6.10  1998/11/19 14:03:38  madden
235  * minor efficiency
236  *
237  * Revision 6.9  1998/11/17 13:39:03  madden
238  * Made ALIGN non-static
239  *
240  * Revision 6.8  1998/09/24 15:26:40  egorov
241  * Fix lint complaints
242  *
243  * Revision 6.7  1998/08/05 21:28:16  madden
244  * Protection against core-dump when gap_extend is zero
245  *
246  * Revision 6.6  1998/05/22 19:16:52  egorov
247  * Another try to fix time problem (increase chunk size to 2Mb)
248  *
249  * Revision 6.5  1998/05/21 12:49:26  egorov
250  * Remove memory allocation optimization to reduce time
251  *
252  * Revision 6.4  1998/04/21 18:39:25  madden
253  * Zhengs suggestion to save memory on long sequences
254  *
255  * Revision 6.3  1998/04/17 19:41:17  madden
256  * Zhengs changes for decline to align
257  *
258  * Revision 6.2  1998/01/06 18:26:48  madden
259  * Gaps have same strand as surrounding sequence
260  *
261  * Revision 6.1  1997/09/22 17:36:29  madden
262  * MACROS for position-specific matrices from Andy Neuwald
263  *
264  * Revision 6.0  1997/08/25 18:53:14  madden
265  * Revision changed to 6.0
266  *
267  * Revision 1.32  1997/06/24 21:03:38  madden
268  * TracebackToGapXEditBlock() check for last GapXEditScript fixed
269  *
270  * Revision 1.31  1997/06/24 19:12:12  tatiana
271  * TracebackToGapXEditBlock() check for last GapXEditScript added
272  *
273  * Revision 1.30  1997/06/24 18:14:33  tatiana
274  * TracebackToGapXEditBlock() fixed
275  *
276  * Revision 1.29  1997/05/13 20:45:50  madden
277  * fixed offset problem in PerformGappedAlignment
278  *
279  * Revision 1.28  1997/04/21 15:35:26  madden
280  * Fixes for 'gapped' StdSegs.
281  *
282  * Revision 1.27  1997/04/16  16:21:38  madden
283  * Set seqalign_type.
284  *
285  * Revision 1.26  1997/04/15  22:01:53  madden
286  * Added original_length[12] for translating searches.
287  *
288  * Revision 1.25  1997/04/14  14:10:37  madden
289  * Fix for the case of dropoff less than the opening penalty.
290  *
291  * Revision 1.24  1997/04/11  19:02:49  madden
292  * Changes for in-frame blastx, tblastn gapping.
293  *
294  * Revision 1.23  1997/03/14  21:01:59  madden
295  * Changed to use less memory in ALIGN, with GapXDropStateArrayStructPtr.
296  *
297  * Revision 1.22  1997/03/11  19:24:08  madden
298  * Check return value of MemNew for state_array in ALIGN.
299  *
300  * Revision 1.21  1997/03/01  18:25:33  madden
301  * Sequences reversed on SeqAlign if reverse flag set on EditBlockPtr.
302  *
303  * Revision 1.20  1997/02/24  16:40:38  madden
304  * Change to GapXEditBlockToSeqAlign to use first SeqIdPtr, duplicate.
305  *
306  * Revision 1.19  1997/02/24  15:09:38  madden
307  * Replaced MemSet by setting some elements of an array to zero,.
308  *
309  * Revision 1.18  1997/02/23  16:44:47  madden
310  * Memory, saved on GapAlignBlkPtr, reuses.
311  *
312  * Revision 1.17  1997/02/20  22:58:49  madden
313  * Support for gapped translated results.
314  *
315  * Revision 1.16  1997/02/20  21:50:24  madden
316  * Added frame and translation information to GapAlignBlk, assigned it.
317  *
318  * Revision 1.15  1997/02/12  22:19:08  madden
319  * Changes for position-based comparisons.
320  *
321  * Revision 1.14  1997/02/10  15:25:33  madden
322  * Changed args. of ALIGN and SEMI_G_ALIGN to pass in gap_align block,
323  * also whether the query was reversed and the query_offset.
324  * Also General cleanup.
325  *
326  * Revision 1.13  1997/02/04  16:22:32  madden
327  * Changes to enable gapped alignments on the reverse strand.
328  *
329  * Revision 1.12  1997/01/22  17:45:08  madden
330  * Added positionBased Boolean to ALIGN and SEMI_G_ALIGN.
331  *
332  * Revision 1.11  1997/01/21  18:55:17  madden
333  * Fixed bug with translation of EditScript to SeqAlign.
334  *
335  * Revision 1.10  1997/01/17  14:29:34  madden
336  * Protection against NULL args added in GapXEditBlockDelete.
337  *
338  * Revision 1.9  1997/01/16  20:20:49  madden
339  * TracebackToGapXEditBlock made non-static.
340  *
341  * Revision 1.8  1997/01/06  22:40:55  madden
342  * Added function SimpleIntervalToGapXEditBlock.
343  *
344  * Revision 1.7  1997/01/06  17:22:59  madden
345  * Used GapXEditScriptToSeqAlign to find SeqAlign.
346  *
347  * Revision 1.6  1996/12/30  21:45:28  madden
348  * UMR fix.
349  *
350  * Revision 1.5  1996/12/30  17:14:06  madden
351  * Fixes for changes for "require a portion of the query sequence".
352  *
353  * Revision 1.4  1996/12/30  15:44:25  madden
354  * Added capability to require a portion of the query sequence.
355  *
356  * Revision 1.3  1996/12/16  15:29:12  madden
357  * Changed gapalign.h to gapxdrop.h
358  *
359  * Revision 1.2  1996/12/12  16:45:03  madden
360  * GapAlignBlkPtr used instead of arguments in functions.
361  *
362  * Revision 1.1  1996/12/12  14:02:51  madden
363  * Initial revision
364  *
365 */
366 
367 
368 #include <gapxdrop.h>
369 #include <blast.h>
370 
371 
372 /* A PACKAGE FOR LOCALLY ALIGNING TWO SEQUENCES WITHIN A BAND:
373 
374    To invoke, call SEMI_G_ALIGN(A,B,M,N,W,G,H,S,X,&EI,&EJ, score_only, positionBased).
375    The parameters are explained as follows:
376 	A, B : two sequences to be aligned
377 	M : the length of sequence A
378 	N : the length of sequence B
379 	W : scoring table for matches and mismatches
380 	G : gap-opening penalty
381 	H : gap-extension penalty
382 	X : maximum drop off
383 	S : script for DISPLAY routine
384 	*EI : ending position of sequence A in the optimal local alignment
385 	*EJ : ending position of sequence B in the optimal local alignment
386 	score_only: indicate if ony score is needed. 1--score only 0--alignment
387 	            is also needed.
388 
389 	Use best_score to cut unfeasible points. quadratic space and row-wise
390 
391 	modification (cfj):
392 	- These routines can be told to changes their access to A and B, so that they are passed pointers to the
393 	  forward sequence, but act as if they had been past pointers to reversed sequences.
394 	  (This is done just as the accesses to posMatrix are flipped if reverse is set.)
395 	  This obviates the need to actually reverse A and B, which in some runs was almost 20% of
396 	  the total runtime.
397 	  This option is invoked by setting reverse_sequence.
398 */
399 
400 
401 /* Append "Delete k" op */
402 #define DEL_(k) \
403 data.last = (data.last < 0) ? (data.sapp[-1] -= (k)) : (*data.sapp++ = -(k));
404 
405 /* Append "Insert k" op */
406 #define INS_(k) \
407 data.last = (data.last > 0) ? (data.sapp[-1] += (k)) : (*data.sapp++ = (k));
408 
409 /* Append "Replace" op */
410 #define REP_ \
411 { data.last = *data.sapp++ = 0; }
412 
413 /* Divide by two to prevent underflows. */
414 #define MININT INT4_MIN/2
415 #define REPP_ \
416 { *data.sapp++ = MININT; data.last = 0; }
417 
418 
419 typedef struct {
420   GapXDPPtr CD;
421   Int4Ptr PNTR v;
422   Int4Ptr sapp;			/* Current script append ptr */
423   Int4  last;
424   Int4 h,  g;
425 } data_t;
426 
427 /* #define	CHUNKSIZE	1048576 */
428 #define	CHUNKSIZE	2097152
429 
430 static GapXDropStateArrayStructPtr
GapXDropGetState(GapXDropStateArrayStructPtr PNTR head,Int4 length)431 GapXDropGetState(GapXDropStateArrayStructPtr PNTR head, Int4 length)
432 
433 {
434 	GapXDropStateArrayStructPtr	retval, var, last;
435 	Int4				chunksize = MAX(CHUNKSIZE, length + length/3);
436 
437 	length += length/3;	/* Add on about 30% so the end will get reused. */
438 	retval = NULL;
439 	if (*head == NULL)
440 	{
441 		retval = (GapXDropStateArrayStructPtr) Nlm_Malloc(sizeof(GapXDropStateArrayStruct));
442 		retval->state_array = Nlm_Malloc(chunksize*sizeof(Uint1));
443 		retval->length = chunksize;
444 		retval->used = 0;
445 		retval->next = NULL;
446 		*head = retval;
447 	}
448 	else
449 	{
450 		var = *head;
451 		last = *head;
452 		while (var)
453 		{
454 			if (length < (var->length - var->used))
455 			{
456 				retval = var;
457 				break;
458 			}
459 			else if (var->used == 0)
460 			{ /* If it's empty and too small, replace. */
461 				var->state_array = MemFree(var->state_array);
462 				var->state_array = Nlm_Malloc(chunksize*sizeof(Uint1));
463 				var->length = chunksize;
464 				retval = var;
465 				break;
466 			}
467 			last = var;
468 			var = var->next;
469 		}
470 
471 		if (var == NULL)
472 		{
473 			retval = (GapXDropStateArrayStructPtr) Nlm_Malloc(sizeof(GapXDropStateArrayStruct));
474 			retval->state_array = Nlm_Malloc(chunksize*sizeof(Uint1));
475 			retval->length = chunksize;
476 			retval->used = 0;
477 			retval->next = NULL;
478 			last->next = retval;
479 		}
480 	}
481 
482 	if (retval->state_array == NULL)
483 		ErrPostEx(SEV_ERROR, 0, 0, "state array is NULL");
484 
485 	return retval;
486 
487 }
488 
489 static Boolean
GapXDropPurgeState(GapXDropStateArrayStructPtr state_struct)490 GapXDropPurgeState(GapXDropStateArrayStructPtr state_struct)
491 
492 {
493 	while (state_struct)
494 	{
495 /*
496 		MemSet(state_struct->state_array, 0, state_struct->used);
497 */
498 		state_struct->used = 0;
499 		state_struct = state_struct->next;
500 	}
501 
502 	return TRUE;
503 }
504 
505 GapXDropStateArrayStructPtr
GapXDropStateDestroy(GapXDropStateArrayStructPtr state_struct)506 GapXDropStateDestroy(GapXDropStateArrayStructPtr state_struct)
507 
508 {
509 	GapXDropStateArrayStructPtr next;
510 
511 	while (state_struct)
512 	{
513 		next = state_struct->next;
514 		MemFree(state_struct->state_array);
515 		MemFree(state_struct);
516 		state_struct = next;
517 
518 	}
519 
520 	return NULL;
521 }
522 
523 /*
524         Aligns two nucleotide sequences, one (A) should be packed in the
525         same way as the BLAST databases, the other (B) should contain one
526         basepair/byte.
527         Boolean reverse_sequence: do reverse the sequence.
528 */
529 
ALIGN_packed_nucl(Uint1Ptr B,Uint1Ptr A,Int4 N,Int4 M,Int4Ptr pej,Int4Ptr pei,GapAlignBlkPtr gap_align,Int4 query_offset,Boolean reverse_sequence)530 static Int4 ALIGN_packed_nucl(Uint1Ptr B, Uint1Ptr A, Int4 N, Int4 M,
531 		Int4Ptr pej, Int4Ptr pei, GapAlignBlkPtr gap_align,
532 		Int4 query_offset, Boolean reverse_sequence)
533 {
534   GapXDPPtr dyn_prog;
535   Int4 i, j, cb, j_r, g, decline_penalty;
536   register Int4 c, d, e, m, tt, h, X, f;
537   Int4 best_score = 0, new_score;
538   Int4Ptr *matrix;
539   register Int4Ptr wa;
540   register GapXDPPtr dp;
541   Uint1Ptr Bptr;
542   Uint1 base_pair;
543   Int4 B_increment=1;
544 
545   matrix = gap_align->matrix;
546   *pei = *pej = 0;
547   m = (g=gap_align->gap_open) + gap_align->gap_extend;
548   h = gap_align->gap_extend;
549   decline_penalty = gap_align->decline_align;
550   X = gap_align->x_parameter;
551 
552   if (X < m)
553 	X = m;
554 
555   if(N <= 0 || M <= 0) return 0;
556 
557   j = (N + 2) * sizeof(GapXDP);
558   if (gap_align->dyn_prog)
559      dyn_prog = gap_align->dyn_prog;
560   else
561      dyn_prog = (GapXDPPtr)Nlm_Malloc(j);
562   if (!dyn_prog) {
563      ErrPostEx(SEV_ERROR, 0, 0,
564                "Cannot allocate %ld bytes for dynamic programming", j);
565      return -1;
566   }
567   dyn_prog[0].CC = 0; c = dyn_prog[0].DD = -m;
568   dyn_prog[0].FF = -m;
569   for(i = 1; i <= N; i++) {
570     if(c < -X) break;
571     dyn_prog[i].CC = c;
572     dyn_prog[i].DD = c - m;
573     dyn_prog[i].FF = c - m - decline_penalty;
574     c -= h;
575   }
576 
577   if(reverse_sequence)
578     B_increment=-1;
579   else
580     B_increment=1;
581 
582   tt = 0;  j = i;
583   for (j_r = 1; j_r <= M; j_r++) {
584 	if(reverse_sequence)
585 	{
586 		base_pair = READDB_UNPACK_BASE_N(A[(M-j_r)/4], ((j_r-1)%4));
587             	wa = matrix[base_pair];
588 	}
589         else
590 	{
591 		base_pair = READDB_UNPACK_BASE_N(A[1+((j_r-1)/4)], (3-((j_r-1)%4)));
592             	wa = matrix[base_pair];
593 	}
594       e = c =f = MININT;
595       Bptr = &B[tt];
596       if(reverse_sequence)
597 	Bptr = &B[N-tt];
598 /*
599       Bptr += B_increment;
600 */
601       for (cb = i = tt, dp = &dyn_prog[i]; i < j; i++) {
602 	 Bptr += B_increment;
603          new_score = wa[*Bptr];
604 
605 	  d = dp->DD;
606 	  if (e < f) e = f;
607 	  if (d < f) d = f;
608 	  if (c < d || c < e) {
609 	      if (d < e) {
610 		  c = e;
611 	      } else {
612 		  c = d;
613 	      }
614 	      if (best_score - c > X) {
615 		  c = dp->CC+new_score; f = dp->FF;
616 		  if (tt == i) tt++;
617 		  else { dp->CC =dp->FF= MININT;}
618 	      } else {
619 		  cb = i;
620                   if ((c-=m) > (d-=h)) {
621                       dp->DD = c;
622                   } else {
623                       dp->DD = d;
624                   }
625                   if (c > (e-=h)) {
626                       e = c;
627                   }
628                   c+=m;
629 		  d = dp->CC+new_score; dp->CC = c; c=d;
630 		  d = dp->FF; dp->FF = f-decline_penalty; f = d;
631 	      }
632 	  } else {
633 	      if (best_score - c > X){
634 		  c = dp->CC+new_score; f= dp->FF;
635 		  if (tt == i) tt++;
636 		  else { dp->CC =dp->FF= MININT;}
637 	      } else {
638 		  cb = i;
639 		  if (c > best_score) {
640 		      best_score = c;
641 		      *pei = j_r; *pej = i;
642 		  }
643 		  if ((c-=m) > (d-=h)) {
644 		      dp->DD = c;
645 		  } else {
646 		      dp->DD = d;
647 		  }
648 		  if (c > (e-=h)) {
649 		      e = c;
650 		  }
651 		  c+=m;
652 		  d = dp->FF;
653 		  if (c-g>f) dp->FF = c-g-decline_penalty; else dp->FF = f-decline_penalty;
654 		  f = d;
655 		  d = dp->CC+new_score; dp->CC = c; c = d;
656 	      }
657 	  }
658           if (new_score == MININT)
659              break;
660 	  dp++;
661       }
662       if (tt == j) break;
663       if (cb < j-1) { j = cb+1;}
664       else while (e >= best_score-X && j <= N) {
665 	  dyn_prog[j].CC = e; dyn_prog[j].DD = e-m;dyn_prog[j].FF = e-g-decline_penalty;
666 	  e -= h; j++;
667       }
668       if (j <= N) {
669 	  dyn_prog[j].DD = dyn_prog[j].CC = dyn_prog[j].FF = MININT; j++;
670       }
671   }
672 
673   if (!gap_align->dyn_prog)
674      MemFree(dyn_prog);
675 
676   return best_score;
677 }
678 /*
679 	Boolean revered: has the sequence been reversed? Used for psi-blast
680 	Boolean reverse_sequence: do reverse the sequence.
681 	Two Booleans are used to emulate behavior before the sequence could be reversed.
682 */
683 
684 
685 static Int4
ALIGN_EX(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4Ptr S,Int4Ptr pei,Int4Ptr pej,Int4Ptr PNTR sapp,GapAlignBlkPtr gap_align,Int4 query_offset,Boolean reversed,Boolean reverse_sequence)686 ALIGN_EX(Uint1Ptr A, Uint1Ptr B, Int4 M, Int4 N, Int4Ptr S, Int4Ptr pei,
687 	Int4Ptr pej, Int4Ptr PNTR sapp, GapAlignBlkPtr gap_align,
688 	Int4 query_offset, Boolean reversed, Boolean reverse_sequence)
689 
690 {
691   data_t data;
692   Int4 i, j, cb,  j_r, s, k;
693   Uint1 st, std, ste;
694   Int4 gap_open, gap_extend, decline_penalty;
695   register Int4 c, d, e, m,t, tt, f, tt_start;
696   Int4 best_score = 0;
697   register Int4Ptr wa;
698   register GapXDPPtr dp, dyn_prog;
699   Uint1Ptr PNTR state, stp, tmp;
700   Uint1Ptr state_array;
701   Int4Ptr *matrix;
702   register Int4 X;
703   GapXDropStateArrayStructPtr state_struct;
704   Int4 next_c;
705   Uint1Ptr Bptr;
706   Int4 B_increment=1;
707   Int4 align_len;
708 
709   matrix = gap_align->matrix;
710   *pei = *pej = 0;
711   data.sapp = *sapp = S;
712   data.last= 0;
713   m = gap_align->gap_open + gap_align->gap_extend;
714   decline_penalty = gap_align->decline_align;
715 
716   gap_open = gap_align->gap_open;
717   gap_extend = gap_align->gap_extend;
718   X = gap_align->x_parameter;
719 
720   if (X < m)
721 	X = m;
722 
723   if(N <= 0 || M <= 0) {
724     *pei = *pej;
725     return 0;
726   }
727 
728   GapXDropPurgeState(gap_align->state_struct);
729 
730   j = (N + 2) * sizeof(GapXDP);
731   if (gap_align->dyn_prog)
732      dyn_prog = gap_align->dyn_prog;
733   else
734      dyn_prog = (GapXDPPtr)Nlm_Malloc(j);
735 
736   state = Nlm_Malloc(sizeof(Uint1Ptr)*(M+1));
737   dyn_prog[0].CC = 0;
738   dyn_prog[0].FF = -m - decline_penalty;
739   c = dyn_prog[0].DD = -m;
740 
741   /* Protection against divide by zero. */
742   if (gap_extend > 0)
743   	state_struct = GapXDropGetState(&gap_align->state_struct, X/gap_extend+5);
744   else
745 	state_struct = GapXDropGetState(&gap_align->state_struct, N+3);
746 
747   state_array = state_struct->state_array;
748   state[0] = state_array;
749   stp  = state[0];
750   for(i = 1; i <= N; i++) {
751     if(c < -X) break;
752     dyn_prog[i].CC = c;
753     dyn_prog[i].DD = c - m;
754     dyn_prog[i].FF = c - m - decline_penalty;
755     c -= gap_extend;
756     stp[i] = 1;
757   }
758   state_struct->used = i+1;
759 
760   if(reverse_sequence)
761     B_increment=-1;
762   else
763     B_increment=1;
764 
765   tt = 0;  j = i;
766   for(j_r = 1; j_r <= M; j_r++) {
767      /* Protection against divide by zero. */
768     if (gap_extend > 0)
769     	state_struct = GapXDropGetState(&gap_align->state_struct, j-tt+5+X/gap_extend);
770     else
771 	state_struct = GapXDropGetState(&gap_align->state_struct, N-tt+3);
772     state_array = state_struct->state_array + state_struct->used + 1;
773     state[j_r] = state_array - tt + 1;
774     stp = state[j_r];
775     tt_start = tt;
776     if (!(gap_align->positionBased)){ /*AAS*/
777       if(reverse_sequence)
778         wa = matrix[A[M-j_r]];
779       else
780         wa = matrix[A[j_r]];
781       }
782     else {
783       if(reversed || reverse_sequence)
784         wa = gap_align->posMatrix[M - j_r];
785       else
786         wa = gap_align->posMatrix[j_r + query_offset];
787     }
788     e = c = f= MININT;
789     Bptr = &B[tt];
790     if(reverse_sequence)
791       Bptr = &B[N-tt];
792 
793     for (cb = i = tt, dp = &dyn_prog[i]; i < j; i++) {
794         Int4 next_f;
795         d = (dp)->DD;
796         Bptr += B_increment;
797         next_c = dp->CC+wa[*Bptr];   /* Bptr is & B[i+1]; */
798         next_f = dp->FF;
799 	st = 0;
800 	if (c < f) {c = f; st = 3;}
801 	if (f > d) {d = f; std = 60;}
802 	else {
803 	  std = 30;
804 	  if (c < d) { c= d;st = 2;}
805 	}
806 	if (f > e) {e = f; ste = 20;}
807 	else {
808 	  ste = 10;
809 	  if (c < e) {c=e; st=1;}
810 	}
811 	if (best_score - c > X){
812 	  if (tt == i) tt++;
813 	  else { dp->CC = MININT; }
814 	} else {
815 	  cb = i;
816 	  if (c > best_score) {
817 	    best_score = c;
818 	    *pei = j_r; *pej = i;
819 	  }
820 	  if ((c-=m) > (d-=gap_extend)) {
821 	    dp->DD = c;
822 	  } else {
823 	    dp->DD = d;
824 	    st+=std;
825 	  }
826 	  if (c > (e-=gap_extend)) {
827 	    e = c;
828 	  }  else {
829 	    st+=ste;
830 	  }
831 	  c+=m;
832 	  if (f < c-gap_open) {
833 	    dp->FF = c-gap_open-decline_penalty;
834 	  } else {
835 	    dp->FF = f-decline_penalty; st+= 5;
836 	  }
837 	  dp->CC = c;
838 	}
839 	stp[i] = st;
840 	c = next_c;
841 	f = next_f;
842 	dp++;
843     }
844     if (tt == j) { j_r++; break;}
845     if (cb < j-1) { j = cb+1;}
846     else {
847 	while (e >= best_score-X && j <= N) {
848 	    dyn_prog[j].CC = e; dyn_prog[j].DD = e-m; dyn_prog[j].FF = e-gap_open-decline_penalty;
849 	    e -= gap_extend; stp[j] = 1;
850 	    j++;
851 	}
852     }
853     if (j <= N) {
854 	dyn_prog[j].DD = dyn_prog[j].CC= dyn_prog[j].FF = MININT;
855 	j++;
856     }
857     state_struct->used += (MAX(i, j) - tt_start + 1);
858   }
859   i = *pei; j = *pej;
860   tmp = Nlm_Malloc(i+j);
861   for (s=0, c = 0; i> 0 || j > 0; c++) {
862       t = state[i][j];
863       k  = t %5;
864       if (s == 1)
865 	  if ((t/10)%3 == 1) k = 1;
866 	  else if ((t/10)%3 == 2) k = 3;
867       if (s == 2)
868 	  if ((t/30) == 1) k = 2;
869 	  else if ((t/30) == 2) k = 3;
870       if (s == 3 && ((t/5)%2) == 1) k = 3;
871       if (k == 1) { j--;}
872       else if (k == 2) {i--;}
873       else {j--; i--;}
874       tmp[c] = s = k;
875   }
876 
877   align_len = c;
878   c--;
879   while (c >= 0) {
880       if (tmp[c] == 0) REP_
881       else if (tmp[c] == 1) INS_(1)
882       else if (tmp[c] == 3) REPP_
883       else DEL_(1)
884       c--;
885   }
886 
887   MemFree(tmp);
888 
889   MemFree(state);
890 
891   if (!gap_align->dyn_prog)
892      MemFree(dyn_prog);
893 
894   if ((align_len -= data.sapp - S) > 0)
895      MemSet(data.sapp, 0, align_len);
896   *sapp = data.sapp;
897 
898   return best_score;
899 }
900 
901 Int4 LIBCALL
ALIGN(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4Ptr S,Int4Ptr pei,Int4Ptr pej,Int4Ptr PNTR sapp,GapAlignBlkPtr gap_align,Int4 query_offset,Boolean reversed)902 ALIGN(Uint1Ptr A, Uint1Ptr B, Int4 M, Int4 N,
903 		Int4Ptr S, Int4Ptr pei, Int4Ptr pej, Int4Ptr PNTR sapp,
904 		GapAlignBlkPtr gap_align, Int4 query_offset, Boolean reversed)
905 
906 {
907 	return ALIGN_EX(A, B, M, N, S, pei, pej, sapp, gap_align, query_offset, reversed, FALSE);
908 }
909 
910 /*
911 	Boolean revered: has the sequence been reversed? Used for psi-blast
912 	Boolean reverse_sequence: do reverse the sequence.
913 	Two Booleans are used to emulate behavior before the sequence could be reversed.
914 */
SEMI_G_ALIGN_EX(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4Ptr S,Int4Ptr pei,Int4Ptr pej,Boolean score_only,Int4Ptr PNTR sapp,GapAlignBlkPtr gap_align,Int4 query_offset,Boolean reversed,Boolean reverse_sequence)915 static Int4 SEMI_G_ALIGN_EX(Uint1Ptr A, Uint1Ptr B, Int4 M, Int4 N,
916 		Int4Ptr S, Int4Ptr pei, Int4Ptr pej,
917 		Boolean score_only, Int4Ptr PNTR sapp, GapAlignBlkPtr gap_align,
918 		Int4 query_offset, Boolean reversed, Boolean reverse_sequence)
919 
920 {
921   GapXDPPtr dyn_prog;
922   Int4 i, j, cb, j_r, g, decline_penalty;
923   register Int4 c, d, e, m, tt, h, X, f;
924   Int4 best_score = 0;
925   Int4Ptr *matrix;
926   register Int4Ptr wa;
927   register GapXDPPtr dp;
928   Uint1Ptr Bptr;
929   Int4 B_increment=1;
930   Int4 next_c, next_f;
931 
932   if(!score_only) {
933     return ALIGN_EX(A, B, M, N, S, pei, pej, sapp, gap_align, query_offset, reversed, reverse_sequence);
934   }
935 
936   matrix = gap_align->matrix;
937   *pei = *pej = 0;
938   m = (g=gap_align->gap_open) + gap_align->gap_extend;
939   h = gap_align->gap_extend;
940   decline_penalty = gap_align->decline_align;
941 
942   X = gap_align->x_parameter;
943 
944   if (X < m)
945 	X = m;
946 
947   if(N <= 0 || M <= 0) return 0;
948 
949   j = (N + 2) * sizeof(GapXDP);
950   dyn_prog = (GapXDPPtr)Nlm_Malloc(j);
951 
952   dyn_prog[0].CC = 0; c = dyn_prog[0].DD = -m;
953   dyn_prog[0].FF = -m - decline_penalty;
954   for(i = 1; i <= N; i++) {
955     if(c < -X) break;
956     dyn_prog[i].CC = c;
957     dyn_prog[i].DD = c - m;
958     dyn_prog[i].FF = c-m - decline_penalty;
959     c -= h;
960   }
961 
962   if(reverse_sequence)
963     B_increment=-1;
964   else
965     B_increment=1;
966 
967   tt = 0;  j = i;
968   for (j_r = 1; j_r <= M; j_r++) {
969       if (!(gap_align->positionBased)){ /*AAS*/
970         if(reverse_sequence)
971             wa = matrix[A[M-j_r]];
972         else
973             wa = matrix[A[j_r]];
974       }
975       else {
976 	  if(reversed || reverse_sequence)
977 	  {
978 	      wa = gap_align->posMatrix[M - j_r];
979 		if (A[M-j_r] == NULLB)  /* Prevents gapping through a NULL byte in rps-blast. */
980 			break;
981 	  }
982 	  else
983 	  {
984 	      wa = gap_align->posMatrix[j_r + query_offset];
985 		if (A[j_r] == NULLB)
986 			break;
987 	  }
988       }
989       e = c =f = MININT;
990       Bptr = &B[tt];
991       if(reverse_sequence)
992          Bptr = &B[N-tt];
993       for (cb = i = tt, dp = &dyn_prog[i]; i < j; i++) {
994          d = dp->DD;
995 	 Bptr += B_increment;
996          next_c = dp->CC+wa[*Bptr];   /* Bptr is & B[i+1]; */
997          next_f = dp->FF;
998 
999          if (c < f) c = f;
1000          if (f > d) d = f;
1001          else if (c < d) c= d;
1002 
1003          if (f > e) e = f;
1004          else if (c < e) c = e;
1005 
1006          if (best_score - c > X){
1007             if (tt == i) tt++;
1008             else { dp->CC = MININT; }
1009          } else {
1010             cb = i;
1011             if (c > best_score) {
1012                best_score = c;
1013                *pei = j_r; *pej = i;
1014             }
1015             if ((c-=m) > (d-=h)) {
1016                dp->DD = c;
1017             } else {
1018                dp->DD = d;
1019             }
1020             if (c > (e-=h)) {
1021                e = c;
1022             }
1023             c+=m;
1024             if (f < c-g) {
1025                dp->FF = c-g-decline_penalty;
1026             } else {
1027                dp->FF = f-decline_penalty;
1028             }
1029             dp->CC = c;
1030          }
1031          c = next_c;
1032          f = next_f;
1033          dp++;
1034       }
1035 
1036       if (tt == j) break;
1037       if (cb < j-1) { j = cb+1;}
1038       else while (e >= best_score-X && j <= N) {
1039 	  dyn_prog[j].CC = e; dyn_prog[j].DD = e-m;dyn_prog[j].FF = e-g-decline_penalty;
1040 	  e -= h; j++;
1041       }
1042       if (j <= N) {
1043 	  dyn_prog[j].DD = dyn_prog[j].CC = dyn_prog[j].FF = MININT; j++;
1044       }
1045   }
1046 
1047   MemFree(dyn_prog);
1048 
1049   return best_score;
1050 }
1051 
SEMI_G_ALIGN(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4Ptr S,Int4Ptr pei,Int4Ptr pej,Boolean score_only,Int4Ptr PNTR sapp,GapAlignBlkPtr gap_align,Int4 query_offset,Boolean reversed)1052 Int4 SEMI_G_ALIGN(Uint1Ptr A, Uint1Ptr B, Int4 M, Int4 N,
1053 		Int4Ptr S, Int4Ptr pei, Int4Ptr pej,
1054 		Boolean score_only, Int4Ptr PNTR sapp, GapAlignBlkPtr gap_align,
1055 		Int4 query_offset, Boolean reversed)
1056 
1057 {
1058 	return SEMI_G_ALIGN_EX(A, B, M, N, S, pei, pej, score_only, sapp, gap_align,
1059 		query_offset, reversed, FALSE);
1060 }
1061 
OOF_ALIGN(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4Ptr S,Int4Ptr pei,Int4Ptr pej,Int4Ptr PNTR sapp,GapAlignBlkPtr gap_align,Int4 query_offset,Boolean reversed)1062 static Int4 OOF_ALIGN(Uint1Ptr A, Uint1Ptr B, Int4 M, Int4 N,
1063                       Int4Ptr S, Int4Ptr pei, Int4Ptr pej, Int4Ptr PNTR sapp,
1064                       GapAlignBlkPtr gap_align, Int4 query_offset,
1065                       Boolean reversed)
1066 
1067 {
1068   data_t data;
1069   Int4 i, j, cb,  j_r, s, k, sc, s1, s2, s3, st1, e1, e2, e3, shift;
1070   register Int4 c, d, m,t, tt, tt_start, f1, f2;
1071   Int4 best_score = 0;
1072   register Int4Ptr wa;
1073   Int4 count = 0;
1074   register GapXDPPtr dp;
1075   Uint1Ptr PNTR state, stp, tmp;
1076   Uint1Ptr state_array;
1077   Int4Ptr *matrix;
1078   register Int4 X;
1079   GapXDropStateArrayStructPtr state_struct;
1080 
1081   matrix = gap_align->matrix;
1082   *pei =0; *pej = -2;
1083   data.sapp = *sapp = S;
1084   data.last= 0;
1085   m = gap_align->gap_open + gap_align->gap_extend;
1086   data.g = gap_align->gap_open;
1087   data.h = gap_align->gap_extend;
1088   data.v = matrix;
1089   X = gap_align->x_parameter;
1090   shift = gap_align->shift_pen;
1091 
1092   if (X < m)
1093 	X = m;
1094 
1095   if(N <= 0 || M <= 0) {
1096     return 0;
1097   }
1098 
1099   N+=2;
1100   GapXDropPurgeState(gap_align->state_struct);
1101 
1102   j = (N + 2) * sizeof(GapXDP);
1103   data.CD = (GapXDPPtr)MemNew(j);
1104 
1105   state = MemNew(sizeof(Uint1Ptr)*(M+1));
1106   data.CD[0].CC = 0;
1107   c = data.CD[0].DD = -m;
1108   state_struct = GapXDropGetState(&gap_align->state_struct, N+3);
1109   state_array = state_struct->state_array;
1110   state[0] = state_array;
1111   stp  = state[0];
1112   data.CD[0].CC = 0; c = data.CD[0].DD = -m;
1113   for(i = 3; i <= N; i+=3) {
1114     data.CD[i].CC = c;
1115     data.CD[i].DD = c - m;
1116     data.CD[i-1].CC = data.CD[i-2].CC = data.CD[i-1].DD =
1117 	data.CD[i-2].DD = MININT;
1118     if(c < -X) break;
1119     c -= data.h;
1120     stp[i] = stp[i-1] = stp[i-2] = 6;
1121   }
1122   i -= 2;
1123   data.CD[i].CC = data.CD[i].DD = MININT;
1124   tt = 0;  j = i;
1125   state_struct->used = i+1;
1126   B-=2;
1127   tt = 0;  j = i;
1128   for(j_r = 1; j_r <= M; j_r++) {
1129     count += j - tt;
1130     state_struct = GapXDropGetState(&gap_align->state_struct, N-tt+3);
1131     state_array = state_struct->state_array + state_struct->used + 1;
1132     state[j_r] = state_array - tt + 1;
1133     stp = state[j_r];
1134     tt_start = tt;
1135     if (!(gap_align->positionBased)) /*AAS*/
1136       wa = matrix[A[j_r]];
1137     else {
1138       if(reversed)
1139         wa = gap_align->posMatrix[M - j_r];
1140       else
1141         wa = gap_align->posMatrix[j_r + query_offset];
1142     }
1143     c = MININT; sc =f1=f2=e1 =e2=e3=s1=s2=s3=MININT;
1144     for(cb = i = tt, dp = &data.CD[i-1]; 1;) {
1145 	if (i >= j) break;
1146 	sc = MAX(MAX(f1, f2)-shift, s3);
1147 	if (sc == s3) st1=3;
1148 	else if (sc+shift == f1) {
1149 	  if (f1 == s2) st1=2; else st1 = 5;
1150 	} else if (f2 == s1) st1 = 1; else st1 = 4;
1151 	sc += wa[B[i++]];
1152 	f1 = s3;
1153 	s3 = (++dp)->CC; f1 = MAX(f1, s3);
1154 	d = dp->DD;
1155 	if (sc < MAX(d, e1)) {
1156 	    if (d > e1) { sc = d; st1 = 30;}
1157 	    else {sc = e1; st1 = 36;}
1158 	    if (best_score -sc > X) {
1159 		if (tt == i-1) tt = i;
1160 		else dp->CC = MININT;
1161 	    } else {
1162 		cb = i;
1163 		dp->CC = sc;
1164 		dp->DD = d-data.h;
1165 		e1-=data.h;
1166 	    }
1167 	} else {
1168 	    if (best_score -sc > X) {
1169 		if (tt == i-1) tt = i;
1170 		else dp->CC = MININT;
1171 	    } else {
1172 		cb = i;
1173 		dp->CC = sc;
1174 		if (sc > best_score) {best_score = sc; *pei = j_r;*pej=i-3;}
1175 		if ((sc-=m) > (e1-=data.h)) e1 = sc; else st1+=10;
1176 		if (sc < (d-=data.h)) { dp->DD = d; st1 += 20;}
1177 		else dp->DD = sc;
1178 	    }
1179 	}
1180 	stp[i-1] = st1;
1181 	if (i >= j) {c = e1; e1 = e2; e2 = e3; e3 = c; break;}
1182 	sc = MAX(MAX(f1,f2)-shift, s2);
1183 	if (sc == s2) st1=3;
1184 	else if (sc+shift == f1) {
1185 	  if (f1 == s3) st1=1; else st1 = 4;
1186 	} else if (f2 == s1) st1 = 2; else st1 = 5;
1187 	sc += wa[B[i++]];
1188 	f2 = s2; s2 = (++dp)->CC; f2 = MAX(f2, s2);
1189 	d = dp->DD;
1190 	if (sc < MAX(d, e2)) {
1191 	    if ((sc=MAX(d,e2)) < best_score-X) {
1192 		if (tt == i-1) tt = i;
1193 		else dp->CC = MININT;
1194 	    } else {
1195 		if (sc == d)  st1= 30; else st1=36;
1196 		cb = i;
1197 		dp->CC = sc;
1198 		dp->DD = d-data.h;
1199 		e2-=data.h;
1200 	    }
1201 	} else {
1202 	    if (sc < best_score-X) {
1203 		if (tt == i-1) tt = i;
1204 		else dp->CC = MININT;
1205 	    } else {
1206 		cb = i;
1207 		dp->CC = sc;
1208 		if (sc > best_score) {best_score = sc;*pei= j_r; *pej=i-3;}
1209 		if ((sc-=m) > (e2-=data.h)) e2 = sc; else st1+=10;
1210 		if (sc < (d-=data.h)) {dp->DD = d; st1+=20;}
1211 		else  dp->DD = sc;
1212 	    }
1213 	}
1214 	stp[i-1] = st1;
1215 	if (i >= j) { c = e2; e2 = e1; e1 = e3; e3 = c; break; }
1216 	sc = MAX(MAX(f1, f2)-shift, s1);
1217 	if (sc == s1) st1=3;
1218 	else if (sc+shift == f1) {
1219 	  if (f1 == s3) st1=2; else st1 = 5;
1220 	} else if (f2 == s2) st1 = 1; else st1 = 4;
1221 	sc += wa[B[i++]];
1222 	f1 = f2;
1223 	f2 = s1; s1 = (++dp)->CC; f2 = MAX(f2, s1);
1224 	d = dp->DD;
1225 	if (sc < MAX(d, e3)) {
1226 	    sc = MAX(d, e3);
1227 	    if (sc < best_score-X) {
1228 		if (tt == i-1) tt = i;
1229 		else dp->CC = MININT;
1230 	    } else {
1231 		if (sc == d) st1 = 30; else st1 = 36;
1232 		cb = i;
1233 		dp->CC = sc;
1234 		dp->DD = d-data.h;
1235 		e3-=data.h;
1236 	    }
1237 	} else {
1238 	    if (sc < best_score-X) {
1239 		if (tt == i-1) tt = i;
1240 		else dp->CC = MININT;
1241 	    } else {
1242 		cb = i;
1243 		dp->CC = sc;
1244 		if (sc > best_score) {best_score = sc;*pei = j_r; *pej=i-3;}
1245 		if ((sc-=m) > (e3-=data.h)) e3 = sc; else st1 += 10;
1246 		if (sc < (d-=data.h)) {dp->DD = d; st1 += 20;}
1247 		else dp->DD = sc;
1248 	    }
1249 	}
1250 	stp[i-1] = st1;
1251 	sc = c;
1252     }
1253     if(tt == j) break;
1254     if(cb < j) { j = cb;}
1255     else {
1256 	c = (MAX(e1, MAX(e2, e3))+X-best_score)/data.h+j;
1257 	if (c > N) c = N;
1258 	if (c > j)
1259 	while (1) {
1260 	    data.CD[j].CC = e1;
1261 	    stp[j] = 36;
1262 	    data.CD[j++].DD = e1 - m; e1 -=data.h;
1263 	    if (j > c) break;
1264 	    data.CD[j].CC = e2; stp[j] = 36;
1265 	    data.CD[j++].DD = e2- m; e2-=data.h;
1266 	    if (j > c) break;
1267 	    data.CD[j].CC = e3; stp[j] = 36;
1268 	    data.CD[j++].DD = e3- m; e3-=data.h;
1269 	    if (j > c) break;
1270 	}
1271     }
1272     c = j+4;
1273     if (c > N+1) c = N+1;
1274     while (j < c) {
1275 	data.CD[j].DD = data.CD[j].CC = MININT;
1276 	j++;
1277     }
1278 
1279     state_struct->used += (MAX(i, j) - tt_start + 1);
1280   }
1281   i = *pei; j = *pej+2;
1282   /* printf("best = %d i,j=%d %d\n", best_score, i, j); */
1283   tmp = MemNew(i + j);
1284   for (s= 1, c= 0; i > 0 || j > 0; c++, i--) {
1285       k  = (t=state[i][j])%10;
1286       if (s == 6 && (t/10)%2 == 1) k = 6;
1287       if (s == 0 && (t/20)== 1) k = 0;
1288       if (k == 6) { j -= 3; i++;}
1289       else {j -= k;}
1290       s = tmp[c] = k;
1291   }
1292   c--;
1293   while(c >= 0) {
1294       *data.sapp++ = tmp[c--];
1295   }
1296 
1297   MemFree(tmp);
1298 
1299   MemFree(state);
1300 
1301   MemFree(data.CD);
1302   *sapp = data.sapp;
1303 
1304   return best_score;
1305 }
1306 
1307 
OOF_SEMI_G_ALIGN(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4Ptr S,Int4Ptr pei,Int4Ptr pej,Boolean score_only,Int4Ptr PNTR sapp,GapAlignBlkPtr gap_align,Int4 query_offset,Boolean reversed)1308 static Int4 OOF_SEMI_G_ALIGN(Uint1Ptr A, Uint1Ptr B, Int4 M, Int4 N,
1309                              Int4Ptr S, Int4Ptr pei, Int4Ptr pej,
1310                              Boolean score_only, Int4Ptr PNTR sapp,
1311                              GapAlignBlkPtr gap_align,
1312                              Int4 query_offset, Boolean reversed)
1313 {
1314   GapXDPPtr CD;
1315   Int4 i, j, cb, j_r;
1316   Int4 e1, e2, e3, s1, s2, s3, shift;
1317   register Int4 c, d, sc, m, tt, h, X, f1, f2;
1318   Int4 best_score = 0, count = 0;
1319   Int4Ptr *matrix;
1320   register Int4Ptr wa;
1321   register GapXDPPtr dp;
1322 
1323   if(!score_only)
1324       return OOF_ALIGN(A, B, M, N, S, pei, pej, sapp, gap_align, query_offset, reversed);
1325 
1326   /*printf("reversed=%d\n", reversed);*/
1327   matrix = gap_align->matrix;
1328   *pei = 0; *pej = -2;
1329   m = gap_align->gap_open + gap_align->gap_extend;
1330   h = gap_align->gap_extend;
1331   X = gap_align->x_parameter;
1332   shift = gap_align->shift_pen;
1333   /*printf("m=%d h=%d X=%d shift=%d %d\n", m,h, X, shift, B[2]);*/
1334 
1335   B-=2;
1336   if (X < m)
1337 	X = m;
1338 
1339   if(N <= 0 || M <= 0) return 0;
1340   N+=2;
1341 
1342   j = (N + 5) * sizeof(GapXDP);
1343   CD = (GapXDPPtr)MemNew(j);
1344   CD[0].CC = 0; c = CD[0].DD = -m;
1345   for(i = 3; i <= N; i+=3) {
1346     CD[i].CC = c;
1347     CD[i].DD = c - m;
1348     CD[i-1].CC = CD[i-2].CC = CD[i-1].DD = CD[i-2].DD = MININT;
1349     if(c < -X) break;
1350     c -= h;
1351   }
1352   i -= 2;
1353   CD[i].CC = CD[i].DD = MININT;
1354   tt = 0;  j = i;
1355   for (j_r = 1; j_r <= M; j_r++) {
1356     count += j - tt; CD[2].CC = CD[2].DD= MININT;
1357     if (!(gap_align->positionBased)) /*AAS*/
1358       wa = matrix[A[j_r]];
1359     else {
1360       if(reversed)
1361         wa = gap_align->posMatrix[M - j_r];
1362       else
1363         wa = gap_align->posMatrix[j_r + query_offset];
1364     }
1365     s1 = s2 = s3 = f1= f2 = MININT; f1=f2=e1 = e2 = e3 = MININT; sc = MININT;
1366     for(cb = i = tt, dp = &CD[i-1]; 1;) {
1367 	if (i >= j) break;
1368 	sc = MAX(MAX(f1, f2)-shift, s3)+wa[B[i++]];
1369 	f1 = s3;
1370 	s3 = (++dp)->CC; f1 = MAX(f1, s3);
1371 	d = dp->DD;
1372 	if (sc < MAX(d, e1)) {
1373 	    sc = MAX(d, e1);
1374 	    if (best_score -sc > X) {
1375 		if (tt == i-1) tt = i;
1376 		else dp->CC = MININT;
1377 	    } else {
1378 		cb = i;
1379 		dp->CC = sc;
1380 		dp->DD = d-h;
1381 		e1-=h;
1382 	    }
1383 	} else {
1384 	    if (best_score -sc > X) {
1385 		if (tt == i-1) tt = i;
1386 		else dp->CC = MININT;
1387 	    } else {
1388 		cb = i;
1389 		dp->CC = sc;
1390 		if (sc > best_score) {best_score = sc; *pei = j_r;*pej=i-1;}
1391 		if ((sc-=m) > (e1-=h)) e1 = sc;
1392 		dp->DD = MAX(sc, d-h);
1393 	    }
1394 	}
1395 	if (i >= j) {c = e1; e1 = e2; e2 = e3; e3 = c; break;}
1396 	sc = MAX(MAX(f1, f2)-shift, s2)+wa[B[i++]];
1397 	f2 = s2; s2 = (++dp)->CC; f2 = MAX(f2, s2);
1398 	d = dp->DD;
1399 	if (sc < MAX(d, e2)) {
1400 	    if ((c=MAX(d,e2)) < best_score-X) {
1401 		if (tt == i-1) tt = i;
1402 		else dp->CC = MININT;
1403 	    } else {
1404 		cb = i;
1405 		dp->CC = c;
1406 		dp->DD = d-h;
1407 		e2-=h;
1408 	    }
1409 	} else {
1410 	    if (sc < best_score-X) {
1411 		if (tt == i-1) tt = i;
1412 		else dp->CC = MININT;
1413 	    } else {
1414 		cb = i;
1415 		dp->CC = sc;
1416 		if (sc > best_score) {best_score = sc;*pei= j_r; *pej=i-1;}
1417 		if ((sc-=m) > (e2-=h)) e2 = sc;
1418 		dp->DD = MAX(sc, d-h);
1419 	    }
1420 	}
1421 	if (i >= j) { c = e2; e2 = e1; e1 = e3; e3 = c; break; }
1422 	sc = MAX(MAX(f1, f2)-shift, s1)+wa[B[++i]];
1423 	f1 = f2;
1424 	f2 = s1; s1 = (++dp)->CC; f2 = MAX(f2, s1);
1425 	d = dp->DD;
1426 	if (sc < MAX(d, e3)) {
1427 	    sc = MAX(d, e3);
1428 	    if (sc < best_score-X) {
1429 		if (tt == i-1) tt = i;
1430 		else dp->CC = MININT;
1431 	    } else {
1432 		cb = i;
1433 		dp->CC = sc;
1434 		dp->DD = d-h;
1435 		e3-=h;
1436 	    }
1437 	} else {
1438 	    if (sc < best_score-X) {
1439 		if (tt == i-1) tt = i;
1440 		else dp->CC = MININT;
1441 	    } else {
1442 		cb = i;
1443 		dp->CC = sc;
1444 		if (sc > best_score) {best_score = sc;*pei = j_r; *pej=i-1;}
1445 		if ((sc-=m) > (e3-=h)) e3 = sc;
1446 		dp->DD = MAX(sc, d-h);
1447 	    }
1448 	}
1449 	sc = c;
1450     }
1451     if(tt == j) break;
1452     if(cb < j) { j = cb;}
1453     else {
1454 	c = (MAX(e1, MAX(e2, e3))+X-best_score)/h+j;
1455 	if (c > N) c = N;
1456 	if (c > j)
1457 	while (1) {
1458 	    CD[j].CC = e1;
1459 	    CD[j++].DD = e1 - m; e1 -=h;
1460 	    if (j > c) break;
1461 	    CD[j].CC = e2;
1462 	    CD[j++].DD = e2- m; e2-=h;
1463 	    if (j > c) break;
1464 	    CD[j].CC = e3;
1465 	    CD[j++].DD = e3- m; e3-=h;
1466 	    if (j > c) break;
1467 	}
1468     }
1469     c = j+4;
1470     if (c > N+1) c = N+1;
1471     while (j < c) {
1472 	CD[j].DD = CD[j].CC = MININT;
1473 	j++;
1474     }
1475   }
1476   /*
1477   printf("j_r=%d M=%d pei pej=%d %d\n", j_r, M, *pei, *pej);
1478   */
1479   /*printf("bestscore = %d\n", best_score);*/
1480 
1481   MemFree(CD);
1482 
1483   return best_score;
1484 }
1485 /*
1486 	Allocates an EditScriptPtr and puts it on the end of
1487 	the chain of EditScriptPtr's.  Returns a pointer to the
1488 	new one.
1489 
1490 */
1491 GapXEditScriptPtr
GapXEditScriptNew(GapXEditScriptPtr old)1492 GapXEditScriptNew(GapXEditScriptPtr old)
1493 
1494 {
1495 	GapXEditScriptPtr new;
1496 
1497 	new = (GapXEditScriptPtr) MemNew(sizeof(GapXEditScript));
1498 
1499 	if (old == NULL)
1500 		return new;
1501 
1502 	while (old->next != NULL)
1503 	{
1504 		old = old->next;
1505 	}
1506 
1507 	old->next = new;
1508 
1509 	return new;
1510 }
1511 
1512 GapXEditScriptPtr LIBCALL
GapXEditScriptDelete(GapXEditScriptPtr old)1513 GapXEditScriptDelete(GapXEditScriptPtr old)
1514 {
1515 	GapXEditScriptPtr next;
1516 
1517 	while (old)
1518 	{
1519 		next = old->next;
1520 		old = MemFree(old);
1521 		old = next;
1522 	}
1523 	return old;
1524 }
1525 
1526 GapXEditBlockPtr LIBCALL
GapXEditBlockNew(Int4 start1,Int4 start2)1527 GapXEditBlockNew(Int4 start1, Int4 start2)
1528 
1529 {
1530 	GapXEditBlockPtr edit_block;
1531 
1532 	edit_block = (GapXEditBlockPtr) MemNew(sizeof(GapXEditBlock));
1533 	edit_block->start1 = start1;
1534 	edit_block->start2 = start2;
1535 
1536 	return edit_block;
1537 }
1538 
1539 GapXEditBlockPtr LIBCALL
GapXEditBlockDelete(GapXEditBlockPtr edit_block)1540 GapXEditBlockDelete(GapXEditBlockPtr edit_block)
1541 
1542 {
1543 	if (edit_block == NULL)
1544 		return NULL;
1545 
1546 	edit_block->esp = GapXEditScriptDelete(edit_block->esp);
1547 
1548 	edit_block = MemFree(edit_block);
1549 
1550 	return edit_block;
1551 }
1552 
1553 /* Alignment display routine */
1554 
1555 GapXEditBlockPtr LIBCALL
TracebackToGapXEditBlock(Uint1Ptr A,Uint1Ptr B,Int4 M,Int4 N,Int4Ptr S,Int4 start1,Int4 start2)1556 TracebackToGapXEditBlock(Uint1Ptr A, Uint1Ptr B, Int4 M, Int4 N, Int4Ptr S, Int4 start1, Int4 start2)
1557 {
1558 
1559   register Int4 i, j, op, number_of_subs, number_of_decline;
1560   GapXEditScriptPtr e_script;
1561   GapXEditBlockPtr edit_block;
1562 
1563   if (S == NULL)
1564 	return NULL;
1565 
1566   i = j = op = number_of_subs = number_of_decline = 0;
1567 
1568   edit_block = GapXEditBlockNew(start1, start2);
1569   edit_block->esp = e_script = GapXEditScriptNew(NULL);
1570 
1571   while(i < M || j < N)
1572   {
1573 	op = *S;
1574 	if (op != MININT && number_of_decline > 0)
1575 	{
1576                e_script->op_type = GAPALIGN_DECLINE;
1577                e_script->num = number_of_decline;
1578                e_script = GapXEditScriptNew(e_script);
1579 		number_of_decline = 0;
1580 	}
1581         if (op != 0 && number_of_subs > 0)
1582 	{
1583                         e_script->op_type = GAPALIGN_SUB;
1584                         e_script->num = number_of_subs;
1585                         e_script = GapXEditScriptNew(e_script);
1586                         number_of_subs = 0;
1587         }
1588 	if (op == 0) {
1589 		i++; j++; number_of_subs++;
1590 	} else if (op == MININT) {
1591 		i++; j++;
1592 		number_of_decline++;
1593 	}
1594 	else
1595 	{
1596 		if(op > 0)
1597 		{
1598 			e_script->op_type = GAPALIGN_DEL;
1599 			e_script->num = op;
1600 			j += op;
1601 			if (i < M || j < N)
1602                                 e_script = GapXEditScriptNew(e_script);
1603 		}
1604 		else
1605 		{
1606 			e_script->op_type = GAPALIGN_INS;
1607 			e_script->num = ABS(op);
1608 			i += ABS(op);
1609 			if (i < M || j < N)
1610                                 e_script = GapXEditScriptNew(e_script);
1611 		}
1612     	}
1613 	S++;
1614   }
1615 
1616   if (number_of_subs > 0)
1617   {
1618 	e_script->op_type = GAPALIGN_SUB;
1619 	e_script->num = number_of_subs;
1620   } else if (number_of_decline > 0) {
1621         e_script->op_type = GAPALIGN_DECLINE;
1622         e_script->num = number_of_decline;
1623   }
1624 
1625   return edit_block;
1626 }
1627 /*
1628   Converts a OOF traceback from the gapped alignment to a
1629   GapXEditBlock.
1630 
1631   This function is for out-of-frame gapping:
1632 
1633   index1 is for the protein, index2 is for the nucleotides.
1634 
1635   0: deletion of a dna codon, i.e dash aligned with a protein letter.
1636   1: one nucleotide vs a protein, deletion of 2 nuc.
1637   2: 2 nucleotides aligned with a protein, i.e deletion of one nuc.
1638   3: substitution, 3 nucleotides vs an amino acid.
1639   4: 4 nuc vs an amino acid.
1640   5: 5 nuc vs an amino acid.
1641   6: a codon aligned with a dash. opposite of 0.
1642 */
1643 
1644 GapXEditBlockPtr LIBCALL
OOFTracebackToGapXEditBlock(Int4 M,Int4 N,Int4Ptr S,Int4 start1,Int4 start2)1645 OOFTracebackToGapXEditBlock(Int4 M, Int4 N, Int4Ptr S, Int4 start1, Int4 start2)
1646 {
1647     register Int4 current_val, last_val, number, index1, index2;
1648     GapXEditScriptPtr e_script;
1649     GapXEditBlockPtr edit_block;
1650 
1651     if (S == NULL)
1652         return NULL;
1653 
1654     number = 0;
1655     index1 = 0;
1656     index2 = 0;
1657 
1658     edit_block = GapXEditBlockNew(start1, start2);
1659     edit_block->is_ooframe = TRUE;
1660     edit_block->esp = e_script = GapXEditScriptNew(NULL);
1661 
1662     last_val = *S;
1663 
1664     /* index1, M - index and length of protein,
1665        index2, N - length of the nucleotide */
1666 
1667     for(index1 = 0, index2 = 0; index1 < M && index2 < N; S++, number++) {
1668 
1669         current_val = *S;
1670         /* New script element will be created for any new frameshift
1671            region  0,3,6 will be collected in a single segment */
1672         if (current_val != last_val || (current_val%3 != 0 &&
1673                                         edit_block->esp != e_script)) {
1674             e_script->num = number;
1675             e_script = GapXEditScriptNew(e_script);
1676 
1677             /* if(last_val%3 != 0 && current_val%3 == 0) */
1678             if(last_val%3 != 0 && current_val == 3)
1679                 /* 1, 2, 4, 5 vs. 0, 3, 6*/
1680                 number = 1;
1681             else
1682                 number = 0;
1683         }
1684 
1685         last_val = current_val;
1686 
1687         /* for out_of_frame == TRUE - we have op_type == S parameter */
1688         e_script->op_type = current_val;
1689 
1690         if(current_val != 6) {
1691             index1++;
1692             index2 += current_val;
1693         } else {
1694             index2 += 3;
1695         }
1696     }
1697     /* Get the last one. */
1698     e_script->num = number;
1699 
1700     return edit_block;
1701 }
1702 
1703 /*
1704 	Destruct Function for GapAlignBlk.  If "state" is not NULL, then
1705 	it's deallocated.
1706 */
1707 GapAlignBlkPtr LIBCALL
GapAlignBlkDelete(GapAlignBlkPtr gap_align)1708 GapAlignBlkDelete(GapAlignBlkPtr gap_align)
1709 
1710 {
1711     if (gap_align == NULL)
1712         return NULL;
1713 
1714     gap_align->state_struct = GapXDropStateDestroy(gap_align->state_struct);
1715     /* GapXEditBlockDelete(gap_align->edit_block); */
1716 
1717     gap_align->dyn_prog = MemFree(gap_align->dyn_prog);
1718     gap_align = MemFree(gap_align);
1719 
1720     return gap_align;
1721 }
1722 
1723 /*
1724 	Allocates GapAlignBlkPtr and "state", if state_column_length and
1725 	state_row_length are not NULL.
1726 
1727 	For "call and forget" applications, state_column_length and
1728 	state_row_length should both be set to zero.  ALIGN will
1729 	then allocate and deallocate this memory.
1730 */
1731 
1732 GapAlignBlkPtr LIBCALL
GapAlignBlkNew(Int4 state_column_length,Int4 state_row_length)1733 GapAlignBlkNew(Int4 state_column_length, Int4 state_row_length)
1734 
1735 {
1736 	GapAlignBlkPtr gap_align;
1737 
1738 	gap_align = MemNew(sizeof(GapAlignBlk));
1739 	if (gap_align == NULL)
1740 		return NULL;
1741 
1742 	/* gap_align->decline_align = INT2_MAX; */
1743 	return gap_align;
1744 }
1745 
1746 Boolean LIBCALL
PerformNtGappedAlignment(GapAlignBlkPtr gap_align)1747 PerformNtGappedAlignment(GapAlignBlkPtr gap_align)
1748 
1749 {
1750 	Boolean found_start, found_end;
1751 	Int4 q_length=0, s_length=0, middle_score, score_right, score_left, private_q_start, private_s_start;
1752 	Int4 include_query=0, index;
1753 	Uint1Ptr query, subject, query_var, subject_var;
1754 
1755 	if (gap_align == NULL)
1756 		return FALSE;
1757 
1758 	found_start = FALSE;
1759 	found_end = FALSE;
1760 
1761 	query = gap_align->query;
1762 	subject = gap_align->subject;
1763 	score_left = 0;
1764 	if (gap_align->q_start != 0 && gap_align->s_start != 0)
1765 	{
1766 		found_start = TRUE;
1767 		q_length = (gap_align->q_start + 1);
1768 		s_length = (gap_align->s_start + 1);
1769 		score_left = ALIGN_packed_nucl(query, subject, q_length, s_length, &private_q_start, &private_s_start, gap_align, gap_align->q_start, TRUE);
1770                 if (score_left < 0)
1771                    return FALSE;
1772 		gap_align->query_start = q_length - private_q_start;
1773 		gap_align->subject_start = s_length - private_s_start;
1774 	}
1775 	else
1776 	{
1777 		q_length = gap_align->q_start;
1778 		s_length = gap_align->s_start;
1779 	}
1780 
1781 	middle_score = 0;
1782 	query_var = query+gap_align->q_start;
1783 	subject_var = subject+gap_align->s_start;
1784 	for (index=0; index<include_query; index++)
1785 	{
1786 		query_var++;
1787 		subject_var++;
1788 		if (!(gap_align->positionBased))  /*AAS*/
1789 		  middle_score += gap_align->matrix[*query_var][*subject_var];
1790 		else
1791 		  middle_score += MtrxScoreGapAlign(gap_align,
1792 				gap_align->q_start+1 + index,*subject_var);
1793 	}
1794 
1795 	score_right = 0;
1796 	if (gap_align->q_start+include_query < gap_align->query_length && gap_align->s_start+include_query < gap_align->subject_length)
1797 	{
1798 		found_end = TRUE;
1799 		score_right = ALIGN_packed_nucl(query+gap_align->q_start+include_query, subject+(gap_align->s_start+include_query)/4, gap_align->query_length-q_length-include_query, gap_align->subject_length-s_length-include_query, &(gap_align->query_stop), &(gap_align->subject_stop), gap_align, gap_align->q_start+include_query, FALSE);
1800                 if (score_right < 0)
1801                    return FALSE;
1802 		gap_align->query_stop += gap_align->q_start+include_query;
1803 		gap_align->subject_stop += gap_align->s_start+include_query;
1804 	}
1805 
1806 	if (found_start == FALSE)
1807 	{	/* Start never found */
1808 		gap_align->query_start = gap_align->q_start;
1809 		gap_align->subject_start = gap_align->s_start;
1810 	}
1811 
1812 	if (found_end == FALSE)
1813 	{
1814 		gap_align->query_stop = gap_align->q_start + include_query - 1;
1815 		gap_align->subject_stop = gap_align->s_start + include_query - 1;
1816 	}
1817 
1818 	gap_align->score = score_right+score_left+middle_score;
1819 
1820 	return TRUE;
1821 }
1822 
1823 Boolean LIBCALL
PerformGappedAlignment(GapAlignBlkPtr gap_align)1824 PerformGappedAlignment(GapAlignBlkPtr gap_align)
1825 
1826 {
1827     Boolean found_start, found_end;
1828     Int4 q_length=0, s_length=0, middle_score, score_right, score_left, private_q_start, private_s_start;
1829     Int4 include_query, index;
1830     Uint1Ptr q_left=NULL, s_left=NULL;
1831     Uint1Ptr query, subject, query_var, subject_var;
1832 
1833     if (gap_align == NULL)
1834         return FALSE;
1835 
1836     found_start = FALSE;
1837     found_end = FALSE;
1838 
1839     query = gap_align->query;
1840     subject = gap_align->subject;
1841     include_query = gap_align->include_query;
1842 
1843     /* Looking for "left" score */
1844     score_left = 0;
1845     if (gap_align->q_start != 0 && gap_align->s_start != 0) {
1846         found_start = TRUE;
1847         if(gap_align->is_ooframe) {
1848             q_left = (Uint1Ptr) MemNew((gap_align->q_start+3)*sizeof(Uint1));
1849             s_left = (Uint1Ptr) MemNew((gap_align->s_start+5)*sizeof(Uint1));
1850 
1851             q_length = reverse_seq(query,
1852                                    query+gap_align->q_start-1, q_left+1);
1853             s_length = reverse_seq(subject,
1854                                    subject+gap_align->s_start-3, s_left+3);
1855 
1856             score_left = OOF_SEMI_G_ALIGN(q_left, s_left+2, q_length, s_length, NULL, &private_q_start, &private_s_start, TRUE, NULL, gap_align, gap_align->q_start, TRUE);
1857             q_left = MemFree(q_left);
1858             s_left = MemFree(s_left);
1859         } else {
1860             q_length = (gap_align->q_start+1);
1861             s_length = (gap_align->s_start+1);
1862             score_left = SEMI_G_ALIGN_EX(query, subject, q_length, s_length, NULL, &private_q_start, &private_s_start, TRUE, NULL, gap_align, gap_align->q_start, FALSE, TRUE);
1863         }
1864 
1865         gap_align->query_start = q_length - private_q_start;
1866         gap_align->subject_start = s_length - private_s_start;
1867 
1868     } else {
1869         q_length = gap_align->q_start;
1870         s_length = gap_align->s_start;
1871     }
1872 
1873     /* Looking for "middle" score */
1874     middle_score = 0;
1875     query_var = query+gap_align->q_start;
1876     subject_var = subject+gap_align->s_start;
1877     for (index=0; index<include_query; index++) {
1878         query_var++;
1879         subject_var++;
1880         if (!(gap_align->positionBased))  /*AAS*/
1881             middle_score += gap_align->matrix[*query_var][*subject_var];
1882         else
1883             middle_score += MtrxScoreGapAlign(gap_align,
1884                                               gap_align->q_start+1 + index,*subject_var);
1885     }
1886 
1887     score_right = 0;
1888     if (gap_align->q_start+include_query < gap_align->query_length && gap_align->s_start+include_query < gap_align->subject_length) {
1889         found_end = TRUE;
1890         if(gap_align->is_ooframe) {
1891             score_right = OOF_SEMI_G_ALIGN(query+gap_align->q_start+include_query-1, subject+gap_align->s_start+include_query-1, gap_align->query_length-q_length-include_query, gap_align->subject_length-s_length-include_query, NULL, &(gap_align->query_stop), &(gap_align->subject_stop), TRUE, NULL, gap_align, gap_align->q_start+include_query, FALSE);
1892         } else {
1893             score_right = SEMI_G_ALIGN_EX(query+gap_align->q_start+include_query, subject+gap_align->s_start+include_query, gap_align->query_length-q_length-include_query, gap_align->subject_length-s_length-include_query, NULL, &(gap_align->query_stop), &(gap_align->subject_stop), TRUE, NULL, gap_align, gap_align->q_start+include_query, FALSE, FALSE);
1894         }
1895 
1896         gap_align->query_stop += gap_align->q_start+include_query;
1897         gap_align->subject_stop += gap_align->s_start+include_query;
1898     }
1899 
1900     if (found_start == FALSE) {	/* Start never found */
1901         gap_align->query_start = gap_align->q_start;
1902         gap_align->subject_start = gap_align->s_start;
1903     }
1904 
1905     if (found_end == FALSE) {
1906         gap_align->query_stop = gap_align->q_start + include_query - 1;
1907         gap_align->subject_stop = gap_align->s_start + include_query - 1;
1908 
1909         if(gap_align->is_ooframe) {  /* Do we really need this ??? */
1910             gap_align->query_stop++;
1911             gap_align->subject_stop++;
1912         }
1913     }
1914 
1915     gap_align->score = score_right+score_left+middle_score;
1916 
1917     return TRUE;
1918 }
1919 
1920 /*
1921 	Perform a gapped alignment and return the score.  A SeqAlignPtr with
1922 	the traceback is also returned.
1923 */
1924 Boolean LIBCALL
PerformGappedAlignmentWithTraceback(GapAlignBlkPtr gap_align)1925 PerformGappedAlignmentWithTraceback(GapAlignBlkPtr gap_align)
1926 {
1927     Boolean found_start, found_end;
1928     Int4 q_length=0, s_length=0, score_right, middle_score, score_left, private_q_length, private_s_length, tmp;
1929     Int4 include_query, index, prev;
1930     Int4Ptr tback, tback1, p, q;
1931     Uint1Ptr q_left=NULL, s_left=NULL;
1932     Uint1Ptr query, subject, query_var, subject_var;
1933 
1934     if (gap_align == NULL)
1935         return FALSE;
1936 
1937     found_start = FALSE;
1938     found_end = FALSE;
1939 
1940     query = gap_align->query;
1941     subject = gap_align->subject;
1942 
1943     /*
1944        tback = tback1 = MemNew((gap_align->subject_length+gap_align->query_length)*sizeof(Int4));
1945     */
1946     tback = tback1 =
1947        Nlm_Malloc((gap_align->subject_length+gap_align->query_length)*sizeof(Int4));
1948     include_query = gap_align->include_query;
1949 
1950     gap_align->tback = tback;
1951 
1952     /* Note: the starting point [q_start,s_start] is included in the left
1953        extension, but not in the right extension */
1954     score_left = 0; prev = 3;
1955     found_start = TRUE;
1956 
1957     if(gap_align->is_ooframe) {
1958        q_left = (Uint1Ptr) MemNew((gap_align->q_start+3)*sizeof(Uint1));
1959        s_left = (Uint1Ptr) MemNew((gap_align->s_start+5)*sizeof(Uint1));
1960 
1961        q_length = reverse_seq(query,
1962                               query+gap_align->q_start-1, q_left+1);
1963        if (gap_align->s_start >= 3) {
1964           s_length = reverse_seq(subject,
1965                                  subject+gap_align->s_start-3, s_left+3);
1966        } else {
1967           s_length = gap_align->s_start - 2;
1968        }
1969        score_left = OOF_SEMI_G_ALIGN(q_left, s_left+2, q_length, s_length, tback, &private_q_length, &private_s_length, FALSE, &tback1, gap_align, gap_align->q_start, TRUE);
1970 
1971        q_left = MemFree(q_left);
1972        s_left = MemFree(s_left);
1973     } else {
1974        q_length = (gap_align->q_start+1);
1975        s_length = (gap_align->s_start+1);
1976 
1977        score_left = SEMI_G_ALIGN_EX(query, subject, q_length, s_length, tback, &private_q_length, &private_s_length, FALSE, &tback1, gap_align, gap_align->q_start, FALSE, TRUE);
1978     }
1979 
1980     for(p = tback, q = tback1 - 1; p < q; p++, q--)  {
1981        tmp = *p;
1982        *p = *q;
1983        *q = tmp;
1984     }
1985 
1986     if(gap_align->is_ooframe){
1987        for (prev = 3, p = tback; p < tback1; p++) {
1988           if (*p == 0 || *p ==  6) continue;
1989           tmp = *p; *p = prev; prev = tmp;
1990        }
1991     }
1992     gap_align->query_start = q_length - private_q_length;
1993     gap_align->subject_start = s_length - private_s_length;
1994 
1995     middle_score = 0;
1996     query_var = query+gap_align->q_start;
1997     subject_var = subject+gap_align->s_start;
1998     for (index=0; index<include_query; index++) {
1999         query_var++;
2000         subject_var++;
2001         if (!(gap_align->positionBased))  /*AAS*/
2002             middle_score += gap_align->matrix[*query_var][*subject_var];
2003         else
2004             middle_score += MtrxScoreGapAlign(gap_align,
2005                                               gap_align->q_start+1 + index,*subject_var);
2006         *tback1 = 0;
2007         tback1++;
2008     }
2009 
2010     score_right = 0;
2011     if ((gap_align->q_start+include_query) < gap_align->query_length && (gap_align->s_start+include_query) < gap_align->subject_length) {
2012         found_end = TRUE;
2013         if(gap_align->is_ooframe){
2014             score_right = OOF_SEMI_G_ALIGN(query+gap_align->q_start+include_query-1, subject+gap_align->s_start+include_query-1, gap_align->query_length-q_length-include_query, gap_align->subject_length-s_length-include_query, tback1, &private_q_length, &private_s_length, FALSE, &tback1, gap_align, gap_align->q_start+include_query, FALSE);
2015             if (prev != 3) {
2016                 while (*p == 0 || *p == 6) p++;
2017                 *p = prev+*p-3;
2018             }
2019         } else {
2020             score_right = SEMI_G_ALIGN_EX(query+gap_align->q_start+include_query, subject+gap_align->s_start+include_query, gap_align->query_length-q_length-include_query, gap_align->subject_length-s_length-include_query, tback1, &private_q_length, &private_s_length, FALSE, &tback1, gap_align, gap_align->q_start+include_query, FALSE, FALSE);
2021         }
2022 
2023         gap_align->query_stop = gap_align->q_start + private_q_length+include_query;
2024         gap_align->subject_stop = gap_align->s_start + private_s_length+include_query;
2025     }
2026 
2027     if (found_start == FALSE) {	/* Start never found */
2028         gap_align->query_start = gap_align->q_start;
2029         gap_align->subject_start = gap_align->s_start;
2030     }
2031 
2032     if (found_end == FALSE) {
2033         gap_align->query_stop = gap_align->q_start + include_query - 1;
2034         gap_align->subject_stop = gap_align->s_start + include_query - 1;
2035 
2036         if(FALSE && gap_align->is_ooframe){ /* Do we really need this ??? */
2037             gap_align->query_stop++;
2038             gap_align->subject_stop++;
2039         }
2040     }
2041 
2042     if(gap_align->is_ooframe) {
2043 	gap_align->edit_block = OOFTracebackToGapXEditBlock(gap_align->query_stop-gap_align->query_start+1, gap_align->subject_stop-gap_align->subject_start+1, tback, gap_align->query_start, gap_align->subject_start);
2044     } else {
2045         gap_align->edit_block = TracebackToGapXEditBlock(query, subject, gap_align->query_stop-gap_align->query_start+1, gap_align->subject_stop-gap_align->subject_start+1, tback, gap_align->query_start, gap_align->subject_start);
2046     }
2047 
2048     gap_align->edit_block->frame1 = gap_align->query_frame;
2049     gap_align->edit_block->frame2 = gap_align->subject_frame;
2050     gap_align->edit_block->length1 = gap_align->query_length;
2051     gap_align->edit_block->length2 = gap_align->subject_length;
2052     gap_align->edit_block->translate1 = gap_align->translate1;
2053     gap_align->edit_block->translate2 = gap_align->translate2;
2054     gap_align->edit_block->discontinuous = gap_align->discontinuous;
2055 
2056 #ifdef OOF_DEBUG_PRINTOUT
2057     {{
2058         CharPtr pro_seq, dnap_seq;
2059         SeqMapTablePtr smtp;
2060         Int4 i;
2061 
2062         pro_seq = MemNew(gap_align->query_length+1);
2063         dnap_seq = MemNew(gap_align->subject_length+3*CODON_LENGTH);
2064 
2065         smtp = SeqMapTableFindObj(Seq_code_ncbieaa, Seq_code_ncbistdaa);
2066 
2067         for(i = 0; i < gap_align->query_length; i++)
2068             pro_seq[i] = SeqMapTableConvert(smtp, query[i]);
2069 
2070         for(i = 0; i < gap_align->subject_length; i++)
2071             dnap_seq[i] = SeqMapTableConvert(smtp, subject[i]);
2072 
2073         /* for(i =0; i <gap_align->subject_length+
2074            gap_align->query_length; i++)
2075            printf("%d ", tback[i]); */
2076 
2077         OOFDisplayTraceBack1(tback, dnap_seq, pro_seq,
2078                              gap_align->subject_stop-1,
2079                              gap_align->query_stop-1,
2080                              gap_align->query_start,
2081                              gap_align->subject_start);
2082         MemFree(dnap_seq);
2083         MemFree(pro_seq);
2084     }}
2085 #endif
2086 
2087     tback = MemFree(tback);
2088     gap_align->tback = NULL;
2089 
2090     gap_align->score = score_right+score_left+middle_score;
2091 
2092     return TRUE;
2093 }
2094 
2095 /*
2096 	Get the current position.
2097 */
2098 
get_current_pos(Int4Ptr pos,Int4 length)2099 static Int4 get_current_pos(Int4Ptr pos, Int4 length)
2100 {
2101     Int4 val;
2102     if(*pos < 0)
2103         val = -(*pos + length -1);
2104     else
2105         val = *pos;
2106     *pos += length;
2107     return val;
2108 }
2109 
GXEMakeSeqAlign(SeqIdPtr query_id,SeqIdPtr subject_id,Boolean reverse,Boolean translate1,Boolean translate2,Int4 numseg,Int4Ptr length,Int4Ptr start,Uint1Ptr strands)2110 static SeqAlignPtr GXEMakeSeqAlign(SeqIdPtr query_id, SeqIdPtr subject_id,
2111                                    Boolean reverse, Boolean translate1,
2112                                    Boolean translate2, Int4 numseg,
2113                                    Int4Ptr length, Int4Ptr start,
2114                                    Uint1Ptr strands)
2115 {
2116     SeqAlignPtr sap;
2117     DenseSegPtr dsp;
2118     StdSegPtr sseg, sseg_head, sseg_old;
2119     SeqLocPtr slp, slp1, slp2;
2120     SeqIntPtr seq_int1;
2121     Int4 index;
2122     Boolean tmp_value;
2123 
2124     sap = SeqAlignNew();
2125 
2126     sap->dim =2; /**only two dimention alignment**/
2127 
2128     /**make the Denseg Object for SeqAlign**/
2129     if (translate1 == FALSE && translate2 == FALSE) {
2130         sap->segtype = SAS_DENSEG; /** use denseg to store the alignment **/
2131         sap->type = SAT_PARTIAL;   /**partial for gapped translating search.*/
2132         dsp = DenseSegNew();
2133         dsp->dim = 2;
2134         dsp->numseg = numseg;
2135         if (reverse) {
2136             dsp->ids = SeqIdDup(subject_id);
2137             dsp->ids->next = SeqIdDup(query_id);
2138         } else {
2139             dsp->ids = SeqIdDup(query_id);
2140             dsp->ids->next = SeqIdDup(subject_id);
2141         }
2142         dsp->starts = start;
2143         dsp->strands = strands;
2144         dsp->lens = length;
2145         sap->segs = dsp;
2146         sap->next = NULL;
2147     } else { /****/
2148         sap->type = SAT_PARTIAL; /**partial for gapped translating search. */
2149         sap->segtype = SAS_STD;  /**use stdseg to store the alignment**/
2150         sseg_head = NULL;
2151         sseg_old = NULL;
2152 
2153         if(reverse) {  /* Reverting translation flags */
2154             tmp_value = translate1;
2155             translate1 = translate2;
2156             translate2 = tmp_value;
2157         }
2158 
2159         for (index=0; index<numseg; index++) {
2160             sseg = StdSegNew();
2161             sseg->dim = 2;
2162             if (sseg_head == NULL) {
2163                 sseg_head = sseg;
2164             }
2165             if (reverse) {
2166                 sseg->ids = SeqIdDup(subject_id);
2167                 sseg->ids->next = SeqIdDup(query_id);
2168             } else {
2169                 sseg->ids = SeqIdDup(query_id);
2170                 sseg->ids->next = SeqIdDup(subject_id);
2171             }
2172 
2173             slp1 = NULL;
2174             if (start[2*index] != -1) {
2175                 seq_int1 = SeqIntNew();
2176                 seq_int1->from = start[2*index];
2177                 if (translate1)
2178                     seq_int1->to = start[2*index] + CODON_LENGTH*length[index] - 1;
2179                 else
2180                     seq_int1->to = start[2*index] + length[index] - 1;
2181                 seq_int1->strand = strands[2*index];
2182 
2183                 if(reverse) {
2184                     seq_int1->id = SeqIdDup(subject_id);
2185                 } else {
2186                     seq_int1->id = SeqIdDup(query_id);
2187                 }
2188 
2189                 ValNodeAddPointer(&slp1, SEQLOC_INT, seq_int1);
2190             } else {
2191                 if(reverse) {
2192                     ValNodeAddPointer(&slp1, SEQLOC_EMPTY, SeqIdDup(subject_id));
2193                 } else {
2194                     ValNodeAddPointer(&slp1, SEQLOC_EMPTY, SeqIdDup(query_id));
2195                 }
2196             }
2197             slp2 = NULL;
2198             if (start[2*index+1] != -1) {
2199                 seq_int1 = SeqIntNew();
2200                 seq_int1->from = start[2*index+1];
2201                 if (translate2)
2202                     seq_int1->to = start[2*index+1] + CODON_LENGTH*length[index] - 1;
2203                 else
2204                     seq_int1->to = start[2*index+1] + length[index] - 1;
2205                 seq_int1->strand = strands[2*index+1];
2206 
2207                 if(reverse) {
2208                     seq_int1->id = SeqIdDup(query_id);
2209                 } else {
2210                     seq_int1->id = SeqIdDup(subject_id);
2211                 }
2212 
2213                 ValNodeAddPointer(&slp2, SEQLOC_INT, seq_int1);
2214             } else {
2215                 if(reverse) {
2216                     ValNodeAddPointer(&slp2, SEQLOC_EMPTY, SeqIdDup(query_id));
2217                 } else {
2218                     ValNodeAddPointer(&slp2, SEQLOC_EMPTY, SeqIdDup(subject_id));
2219                 }
2220             }
2221             /*
2222               if (reverse) {
2223               slp = slp2;
2224               slp2->next = slp1;
2225               } else {
2226               slp = slp1;
2227               slp1->next = slp2;
2228               } */
2229 
2230             slp = slp1;
2231             slp1->next = slp2;
2232 
2233             sseg->loc = slp;
2234 
2235             if (sseg_old)
2236                 sseg_old->next = sseg;
2237             sseg_old = sseg;
2238         }
2239         sap->segs = sseg_head;
2240         sap->next = NULL;
2241 
2242         MemFree(start);
2243         MemFree(length);
2244         MemFree(strands);
2245     }
2246 
2247     return sap;
2248 }
2249 
GXECollectDataForSeqalign(GapXEditBlockPtr edit_block,GapXEditScriptPtr curr_in,Int4 numseg,Int4Ptr PNTR start_out,Int4Ptr PNTR length_out,Uint1Ptr PNTR strands_out,Int4Ptr start1,Int4Ptr start2)2250 Boolean GXECollectDataForSeqalign(GapXEditBlockPtr edit_block,
2251                                   GapXEditScriptPtr curr_in, Int4 numseg,
2252                                   Int4Ptr PNTR start_out,
2253                                   Int4Ptr PNTR length_out,
2254                                   Uint1Ptr PNTR strands_out,
2255                                   Int4Ptr start1, Int4Ptr start2)
2256 {
2257     GapXEditScriptPtr curr;
2258     Boolean reverse, translate1, translate2;
2259     Int2 frame1, frame2;
2260     Int4 begin1, begin2, index, length1, length2;
2261     Int4 original_length1, original_length2, i;
2262     Int4Ptr length, start;
2263     Uint1 strand1, strand2;
2264     Uint1Ptr strands;
2265 
2266     reverse = edit_block->reverse;
2267     length1 = edit_block->length1;
2268     length2 = edit_block->length2;
2269     original_length1 = edit_block->original_length1;
2270     original_length2 = edit_block->original_length2;
2271     translate1 = edit_block->translate1;
2272     translate2 = edit_block->translate2;
2273     frame1 = edit_block->frame1;
2274     frame2 = edit_block->frame2;
2275 
2276     if (frame1 > 0)
2277         strand1 = Seq_strand_plus;
2278     else if (frame1 < 0)
2279         strand1 = Seq_strand_minus;
2280     else
2281         strand1 = Seq_strand_unknown;
2282 
2283     if (frame2 > 0)
2284         strand2 = Seq_strand_plus;
2285     else if (frame2 < 0)
2286         strand2 = Seq_strand_minus;
2287     else
2288         strand2 = Seq_strand_unknown;
2289 
2290     start = MemNew((2*numseg+1)*sizeof(Int4));
2291     *start_out = start;
2292     length = MemNew((numseg+1)*sizeof(Int4));
2293     *length_out = length;
2294     strands = MemNew((2*numseg+1)*sizeof(Uint1));
2295     *strands_out = strands;
2296 
2297     index=0;
2298     for (i = 0, curr=curr_in; curr && i < numseg; curr=curr->next, i++) {
2299         switch(curr->op_type) {
2300         case GAPALIGN_DECLINE:
2301         case GAPALIGN_SUB:
2302             if (strand1 != Seq_strand_minus) {
2303                 if(translate1 == FALSE)
2304                     begin1 = get_current_pos(start1, curr->num);
2305                 else
2306                     begin1 = frame1 - 1 + CODON_LENGTH*get_current_pos(start1, curr->num);
2307             } else {
2308                 if(translate1 == FALSE)
2309                     begin1 = length1 - get_current_pos(start1, curr->num) - curr->num;
2310                 else
2311                     begin1 = original_length1 - CODON_LENGTH*(get_current_pos(start1, curr->num)+curr->num) + frame1 + 1;
2312             }
2313 
2314             if (strand2 != Seq_strand_minus) {
2315                 if(translate2 == FALSE)
2316                     begin2 = get_current_pos(start2, curr->num);
2317                 else
2318                     begin2 = frame2 - 1 + CODON_LENGTH*get_current_pos(start2, curr->num);
2319             } else {
2320                 if(translate2 == FALSE)
2321                     begin2 = length2 - get_current_pos(start2, curr->num) - curr->num;
2322                 else
2323                     begin2 = original_length2 - CODON_LENGTH*(get_current_pos(start2, curr->num)+curr->num) + frame2 + 1;
2324             }
2325 
2326             if (reverse) {
2327                 strands[2*index] = strand2;
2328                 strands[2*index+1] = strand1;
2329                 start[2*index] = begin2;
2330                 start[2*index+1] = begin1;
2331             } else {
2332                 strands[2*index] = strand1;
2333                 strands[2*index+1] = strand2;
2334                 start[2*index] = begin1;
2335                 start[2*index+1] = begin2;
2336             }
2337 
2338             break;
2339 
2340         case GAPALIGN_DEL:
2341             begin1 = -1;
2342             if (strand2 != Seq_strand_minus) {
2343                 if(translate2 == FALSE)
2344                     begin2 = get_current_pos(start2, curr->num);
2345                 else
2346                     begin2 = frame2 - 1 + CODON_LENGTH*get_current_pos(start2, curr->num);
2347             } else {
2348                 if(translate2 == FALSE)
2349                     begin2 = length2 - get_current_pos(start2, curr->num) - curr->num;
2350                 else
2351                     begin2 = original_length2 - CODON_LENGTH*(get_current_pos(start2, curr->num)+curr->num) + frame2 + 1;
2352             }
2353 
2354             if (reverse) {
2355                 strands[2*index] = strand2;
2356                 if (index > 0)
2357                     strands[2*index+1] = strands[2*(index-1)+1];
2358                 else
2359                     strands[2*index+1] = Seq_strand_unknown;
2360                 start[2*index] = begin2;
2361                 start[2*index+1] = begin1;
2362             } else {
2363                 if (index > 0)
2364                     strands[2*index] = strands[2*(index-1)];
2365                 else
2366                     strands[2*index] = Seq_strand_unknown;
2367                 strands[2*index+1] = strand2;
2368                 start[2*index] = begin1;
2369                 start[2*index+1] = begin2;
2370             }
2371 
2372             break;
2373 
2374         case GAPALIGN_INS:
2375             if (strand1 != Seq_strand_minus) {
2376                 if(translate1 == FALSE)
2377                     begin1 = get_current_pos(start1, curr->num);
2378                 else
2379                     begin1 = frame1 - 1 + CODON_LENGTH*get_current_pos(start1, curr->num);
2380             } else {
2381                 if(translate1 == FALSE)
2382                     begin1 = length1 - get_current_pos(start1, curr->num) - curr->num;
2383                 else
2384                     begin1 = original_length1 - CODON_LENGTH*(get_current_pos(start1, curr->num)+curr->num) + frame1 + 1;
2385             }
2386             begin2 = -1;
2387             if (reverse) {
2388                 if (index > 0)
2389                     strands[2*index] = strands[2*(index-1)];
2390                 else
2391                     strands[2*index] = Seq_strand_unknown;
2392                 strands[2*index+1] = strand1;
2393                 start[2*index] = begin2;
2394                 start[2*index+1] = begin1;
2395             } else {
2396                 strands[2*index] = strand1;
2397                 if (index > 0)
2398                     strands[2*index+1] = strands[2*(index-1)+1];
2399                 else
2400                     strands[2*index+1] = Seq_strand_unknown;
2401                 start[2*index] = begin1;
2402                 start[2*index+1] = begin2;
2403             }
2404 
2405             break;
2406         default:
2407             break;
2408         }
2409         length[index] = curr->num;
2410         index++;
2411     }
2412 
2413     return TRUE;
2414 }
2415 
GXECorrectUASequence(GapXEditBlockPtr edit_block)2416 static void GXECorrectUASequence(GapXEditBlockPtr edit_block)
2417 {
2418     GapXEditScriptPtr curr, curr_last, curr_last2;
2419     Boolean last_indel;
2420 
2421     last_indel = FALSE;
2422     curr_last = NULL;
2423     curr_last2 = NULL;
2424 
2425     for (curr=edit_block->esp; curr; curr = curr->next) {
2426 
2427         if(curr->op_type == GAPALIGN_DECLINE && last_indel == TRUE) {
2428             /* This is invalid condition and regions should be
2429                exchanged */
2430 
2431             if(curr_last2 != NULL)
2432                 curr_last2->next = curr;
2433             else
2434                 edit_block->esp = curr; /* First element in a list */
2435 
2436             curr_last->next = curr->next;
2437             curr->next = curr_last;
2438         }
2439 
2440         last_indel = FALSE;
2441 
2442         if(curr->op_type == GAPALIGN_INS || curr->op_type == GAPALIGN_DEL) {
2443             last_indel = TRUE;
2444             curr_last2 = curr_last;
2445         }
2446 
2447         curr_last = curr;
2448     }
2449     return;
2450 }
2451 
2452 /*
2453    Convert an EditScript chain to a SeqAlign of type DenseSeg.
2454    Used for a non-simple interval (i.e., one without subs. or
2455    deletions).
2456 
2457    The first SeqIdPtr in the chain of subject_id and query_id is duplicated for
2458    the SeqAlign.
2459    */
2460 
2461 SeqAlignPtr LIBCALL
GapXEditBlockToSeqAlign(GapXEditBlockPtr edit_block,SeqIdPtr subject_id,SeqIdPtr query_id)2462 GapXEditBlockToSeqAlign(GapXEditBlockPtr edit_block, SeqIdPtr subject_id, SeqIdPtr query_id)
2463 
2464 {
2465     GapXEditScriptPtr curr, curr_last;
2466     Int4 numseg, start1, start2;
2467     Int4Ptr length, start;
2468     Uint1Ptr strands;
2469     Boolean is_disc_align = FALSE;
2470     SeqAlignPtr sap, sap_disc, sap_head, sap_tail;
2471     Boolean skip_region = FALSE;
2472 
2473     is_disc_align = FALSE; numseg = 0;
2474 
2475     for (curr=edit_block->esp; curr; curr = curr->next) {
2476         numseg++;
2477         if(/*edit_block->discontinuous && */curr->op_type == GAPALIGN_DECLINE)
2478            is_disc_align = TRUE;
2479     }
2480 
2481     start1 = edit_block->start1;
2482     start2 = edit_block->start2;
2483 
2484     /* If no GAPALIGN_DECLINE regions exists output seqalign will be
2485        regular Den-Seg or Std-seg */
2486     if(is_disc_align == FALSE) {
2487         /* Please note, that edit_block passed only for data like
2488            strand, translate, reverse etc. Real data is taken starting
2489            from "curr" and taken only "numseg" segments */
2490 
2491         GXECollectDataForSeqalign(edit_block, edit_block->esp, numseg,
2492                                   &start, &length, &strands, &start1, &start2);
2493 
2494         /* Result of this function will be either den-seg or Std-seg
2495            depending on translation options */
2496         sap = GXEMakeSeqAlign(query_id, subject_id, edit_block->reverse,
2497                               edit_block->translate1, edit_block->translate2,
2498                               numseg, length, start, strands);
2499     } else {
2500 
2501         /* By request of Steven Altschul - we need to have
2502            the unaligned part being to the left if it is adjacent to the
2503            gap (insertion or deletion) - so this function will do
2504            shaffeling */
2505 
2506         GXECorrectUASequence(edit_block);
2507 
2508         sap_disc = SeqAlignNew();
2509         sap_disc->dim = 2;
2510         sap_disc->type = SAT_PARTIAL; /* ordered segments, over part of seq */
2511         sap_disc->segtype = SAS_DISC; /* discontinuous alignment */
2512 
2513         curr_last = edit_block->esp;
2514         sap_head = NULL; sap_tail = NULL;
2515         while(curr_last != NULL) {
2516             skip_region = FALSE;
2517             for (numseg = 0, curr = curr_last;
2518                  curr; curr = curr->next, numseg++) {
2519 
2520                 if(curr->op_type == GAPALIGN_DECLINE) {
2521                     if(numseg != 0) { /* End of aligned area */
2522                         break;
2523                     } else {
2524                         while(curr && curr->op_type == GAPALIGN_DECLINE) {
2525                             numseg++;
2526                             curr = curr->next;
2527                         }
2528                         skip_region = TRUE;
2529                         break;
2530                     }
2531                 }
2532             }
2533 
2534             GXECollectDataForSeqalign(edit_block, curr_last, numseg,
2535                                       &start, &length, &strands,
2536                                       &start1, &start2);
2537 
2538             if(!skip_region) {
2539                 sap = GXEMakeSeqAlign(query_id, subject_id,
2540                                       edit_block->reverse,
2541                                       edit_block->translate1,
2542                                       edit_block->translate2,
2543                                       numseg, length, start, strands);
2544 
2545                 /* Collecting all seqaligns into single linked list */
2546                 if(sap_tail == NULL) {
2547                     sap_head = sap_tail = sap;
2548                 } else {
2549                     sap_tail->next = sap;
2550                     sap_tail = sap;
2551                 }
2552             }
2553 
2554             curr_last = curr;
2555         }
2556         sap_disc->segs = sap_head;
2557         sap = sap_disc;
2558     }
2559 
2560     return sap;
2561 }
2562 
2563 /*
2564    This function is used for Out-Of-Frame traceback conversion
2565    Convert an OOF EditScript chain to a SeqAlign of type StdSeg.
2566    Used for a non-simple interval (i.e., one without subs. or
2567    deletions).
2568 
2569    The first SeqIdPtr in the chain of subject_id and query_id is duplicated for
2570    the SeqAlign.
2571 */
OOFGapXEditBlockToSeqAlign(GapXEditBlockPtr edit_block,SeqIdPtr subject_id,SeqIdPtr query_id,Int4 query_length)2572 SeqAlignPtr LIBCALL OOFGapXEditBlockToSeqAlign(GapXEditBlockPtr edit_block, SeqIdPtr subject_id, SeqIdPtr query_id, Int4 query_length)
2573 {
2574     Boolean reverse;
2575     GapXEditScriptPtr curr, esp;
2576     Int2 frame1, frame2;
2577     Int4 start1, start2;
2578     Int4 original_length1, original_length2;
2579     SeqAlignPtr sap;
2580     SeqIntPtr seq_int1, seq_int2;
2581     SeqIntPtr seq_int1_last = NULL, seq_int2_last = NULL;
2582     SeqIdPtr sip;
2583     SeqLocPtr slp, slp1, slp2;
2584     StdSegPtr sseg, sseg_head, sseg_old;
2585     Uint1 strand1, strand2;
2586     Boolean first_shift;
2587 
2588     reverse = edit_block->reverse;
2589 
2590 
2591     start1 = edit_block->start1;
2592     start2 = edit_block->start2;
2593     frame1 = edit_block->frame1;
2594     frame2 = edit_block->frame2;
2595 
2596     original_length1 = edit_block->original_length2; /* Protein */
2597     original_length2 = edit_block->original_length1; /* DNA */
2598 
2599     /* printf("%d %d %d %d\n", start1, start2, original_length1,
2600        original_length2); */
2601 
2602     if(frame1 > 0)
2603         strand1 = Seq_strand_plus;
2604     else if (frame1 < 0)
2605         strand1 = Seq_strand_minus;
2606     else
2607         strand1 = Seq_strand_unknown;
2608 
2609     if(frame2 > 0)
2610         strand2 = Seq_strand_plus;
2611     else if (frame2 < 0)
2612         strand2 = Seq_strand_minus;
2613     else
2614         strand2 = Seq_strand_unknown;
2615 
2616     esp = edit_block->esp;
2617 
2618     sap = SeqAlignNew();
2619 
2620     sap->dim =2; /**only two dimention alignment**/
2621 
2622     sap->type =3; /**partial for gapped translating search. */
2623     sap->segtype =3; /**use denseg to store the alignment**/
2624     sseg_head = NULL;
2625     sseg_old = NULL;
2626     esp = edit_block->esp;
2627 
2628     /* query_length--; */
2629 
2630     first_shift = FALSE;
2631 
2632     for (curr=esp; curr; curr=curr->next) {
2633 
2634         slp1 = NULL;
2635         slp2 = NULL;
2636 
2637         switch (curr->op_type) {
2638         case 0: /* deletion of three nucleotides. */
2639 
2640             first_shift = FALSE;
2641 
2642             seq_int1 = SeqIntNew();
2643             seq_int1->from = get_current_pos(&start1, curr->num);
2644             seq_int1->to = start1 - 1;
2645 
2646             if(seq_int1->to >= original_length1)
2647                 seq_int1->to = original_length1-1;
2648 
2649             seq_int1->id = SeqIdDup(subject_id);
2650             seq_int1->strand = strand1;
2651 
2652             ValNodeAddPointer(&slp1, SEQLOC_INT, seq_int1);
2653 
2654             /* Empty nucleotide piece */
2655             ValNodeAddPointer(&slp2, SEQLOC_EMPTY, SeqIdDup(query_id));
2656 
2657             seq_int1_last = seq_int1;
2658             /* Keep previous seq_int2_last, in case there is a frame shift
2659                immediately after this gap */
2660 
2661             break;
2662 
2663         case 6: /* insertion of three nucleotides. */
2664 
2665             /* If gap is followed after frameshift - we have to
2666                add this element for the alignment to be correct */
2667 
2668             if(first_shift == TRUE) { /* Second frameshift in a row */
2669                 /* Protein coordinates */
2670                 seq_int1 = SeqIntNew();
2671                 seq_int1->from =  get_current_pos(&start1, 1);
2672                 seq_int1->to = start1 - 1;
2673 
2674                 if(seq_int1->to >= original_length1)
2675                     seq_int1->to = original_length1-1;
2676 
2677                 seq_int1->id = SeqIdDup(subject_id);
2678                 seq_int1->strand = strand1;
2679 
2680                 ValNodeAddPointer(&slp1, SEQLOC_INT, seq_int1);
2681 
2682                 /* Nucleotide scale shifted by op_type */
2683                 seq_int2 = SeqIntNew();
2684 
2685                 seq_int2->from = get_current_pos(&start2, 3);
2686                 seq_int2->to = start2 - 1;
2687 
2688                 if(seq_int2->to >= query_length) {/* Possible with frame shifts */
2689                     seq_int2->to = query_length -1;
2690                     seq_int1->to--;
2691                 }
2692 
2693                 /* Transfer to DNA minus strand coordinates */
2694                 if(strand2 == Seq_strand_minus) {
2695                     int tmp_int;
2696                     tmp_int = seq_int2->to;
2697                     seq_int2->to = original_length2 - seq_int2->from - 1;
2698                     seq_int2->from = original_length2 - tmp_int - 1;
2699                 }
2700 
2701                 seq_int2->id = SeqIdDup(query_id);
2702                 seq_int2->strand = strand2;
2703 
2704                 ValNodeAddPointer(&slp2, SEQLOC_INT, seq_int2);
2705 
2706                 /* seq_int1_last = seq_int1;
2707                    seq_int2_last = seq_int2; */
2708 
2709                 /* first_shift = FALSE; */
2710 
2711                 if (reverse) {
2712                     slp = slp1;
2713                     slp1->next = slp2;
2714                     sip = SeqIdDup(subject_id);
2715                     sip->next = SeqIdDup(query_id);
2716                 } else {
2717                     slp = slp2;
2718                     slp2->next = slp1;
2719                     sip = SeqIdDup(query_id);
2720                     sip->next = SeqIdDup(subject_id);
2721                 }
2722 
2723                 sseg = StdSegNew();
2724                 sseg->dim = 2;
2725 
2726                 if (sseg_head == NULL)
2727                     sseg_head = sseg;
2728 
2729                 sseg->loc = slp;
2730                 sseg->ids = sip;
2731 
2732                 if (sseg_old)
2733                     sseg_old->next = sseg;
2734 
2735                 sseg_old = sseg;
2736 
2737                 slp1 = NULL;
2738                 slp2 = NULL;
2739             }
2740 
2741             first_shift = FALSE;
2742 
2743             /* Protein piece is empty */
2744             ValNodeAddPointer(&slp1, SEQLOC_EMPTY, SeqIdDup(subject_id));
2745 
2746             /* Nucleotide scale shifted by 3, protein gapped */
2747             seq_int2 = SeqIntNew();
2748             seq_int2->from = get_current_pos(&start2, curr->num*3);
2749             seq_int2->to = start2 - 1;
2750 
2751             if(seq_int2->to >= query_length) {/* Possible with frame shifts */
2752                 seq_int2->to = query_length -1;
2753             }
2754 
2755             /* Transfer to DNA minus strand coordinates */
2756             if(strand2 == Seq_strand_minus) {
2757                 int tmp_int;
2758                 tmp_int = seq_int2->to;
2759                 seq_int2->to = original_length2 - seq_int2->from - 1;
2760                 seq_int2->from = original_length2 - tmp_int - 1;
2761             }
2762 
2763             seq_int2->id = SeqIdDup(query_id);
2764             seq_int2->strand = strand2;
2765 
2766             ValNodeAddPointer(&slp2, SEQLOC_INT, seq_int2);
2767 
2768             seq_int1_last = NULL;
2769             seq_int2_last = seq_int2; /* Will be used to adjust "to" value */
2770 
2771             break;
2772 
2773         case 3: /* Substitution. */
2774 
2775             first_shift = FALSE;
2776 
2777             /* Protein coordinates */
2778             seq_int1 = SeqIntNew();
2779             seq_int1->from =  get_current_pos(&start1, curr->num);
2780             seq_int1->to = start1 - 1;
2781 
2782             if(seq_int1->to >= original_length1)
2783                 seq_int1->to = original_length1-1;
2784 
2785             seq_int1->id = SeqIdDup(subject_id);
2786             seq_int1->strand = strand1;
2787 
2788             ValNodeAddPointer(&slp1, SEQLOC_INT, seq_int1);
2789 
2790             /* Nucleotide scale shifted by op_type */
2791             seq_int2 = SeqIntNew();
2792 
2793             /* Adjusting last segment and new start point in
2794                nucleotide coordinates */
2795             /* if(seq_int2_last != NULL) {
2796                seq_int2_last->to = seq_int2_last->to - (3 - curr->op_type);
2797                start2 = start2 - (3 - curr->op_type);
2798                } */
2799 
2800             seq_int2->from = get_current_pos(&start2, curr->num*curr->op_type);
2801             seq_int2->to = start2 - 1;
2802 
2803 		/* Chop off three bases and one residue at a time.
2804 			Why does this happen, seems like a bug?
2805 		*/
2806             while (seq_int2->to >= query_length) {/* Possible with frame shifts */
2807                 seq_int2->to -= 3;
2808                 seq_int1->to--;
2809             }
2810 
2811             /* Transfer to DNA minus strand coordinates */
2812             if(strand2 == Seq_strand_minus) {
2813                 int tmp_int;
2814                 tmp_int = seq_int2->to;
2815                 seq_int2->to = original_length2 - seq_int2->from - 1;
2816                 seq_int2->from = original_length2 - tmp_int - 1;
2817             }
2818 
2819             seq_int2->id = SeqIdDup(query_id);
2820             seq_int2->strand = strand2;
2821 
2822             ValNodeAddPointer(&slp2, SEQLOC_INT, seq_int2);
2823 
2824             seq_int1_last = seq_int1; /* Will be used to adjust "to" value */
2825             seq_int2_last = seq_int2; /* Will be used to adjust "to" value */
2826 
2827             break;
2828         case 1:	/* gap of two nucleotides. */
2829         case 2: /* Gap of one nucleotide. */
2830         case 4: /* Insertion of one nucleotide. */
2831         case 5: /* Insertion of two nucleotides. */
2832 
2833             if(first_shift == TRUE) { /* Second frameshift in a row */
2834                 /* Protein coordinates */
2835                 seq_int1 = SeqIntNew();
2836                 seq_int1->from =  get_current_pos(&start1, 1);
2837                 seq_int1->to = start1 - 1;
2838 
2839                 if(seq_int1->to >= original_length1)
2840                     seq_int1->to = original_length1-1;
2841 
2842                 seq_int1->id = SeqIdDup(subject_id);
2843                 seq_int1->strand = strand1;
2844 
2845                 ValNodeAddPointer(&slp1, SEQLOC_INT, seq_int1);
2846 
2847                 /* Nucleotide scale shifted by op_type */
2848                 seq_int2 = SeqIntNew();
2849 
2850                 seq_int2->from = get_current_pos(&start2, curr->op_type);
2851                 seq_int2->to = start2 - 1;
2852 
2853                 if(seq_int2->to >= query_length) {/* Possible with frame shifts */
2854                     seq_int2->to = query_length -1;
2855                     seq_int1->to--;
2856                 }
2857 
2858                 /* Transfer to DNA minus strand coordinates */
2859                 if(strand2 == Seq_strand_minus) {
2860                     int tmp_int;
2861                     tmp_int = seq_int2->to;
2862                     seq_int2->to = original_length2 - seq_int2->from - 1;
2863                     seq_int2->from = original_length2 - tmp_int - 1;
2864                 }
2865 
2866                 seq_int2->id = SeqIdDup(query_id);
2867                 seq_int2->strand = strand2;
2868 
2869                 ValNodeAddPointer(&slp2, SEQLOC_INT, seq_int2);
2870 
2871                 seq_int1_last = seq_int1;
2872                 seq_int2_last = seq_int2;
2873 
2874                 /* first_shift = FALSE; */
2875 
2876                 break;
2877             }
2878 
2879             first_shift = TRUE;
2880 
2881             /* If this substitution is following simple frameshift
2882                we do not need to start new segment, but may continue
2883                old one */
2884             /* printf("curr_num = %d (%d)\n",  curr->num, curr->op_type); */
2885 
2886             if(seq_int2_last != NULL) {
2887                 get_current_pos(&start2, curr->num*(curr->op_type-3));
2888                 if(strand2 != Seq_strand_minus) {
2889                     seq_int2_last->to = start2 - 1;
2890                 } else {
2891                     /* Transfer to DNA minus strand coordinates */
2892                     seq_int2_last->from = original_length2 - start2;
2893                 }
2894 
2895                 /* Adjustment for multiple shifts - theoretically possible,
2896                    but very unprobable */
2897                 if(seq_int2_last->from > seq_int2_last->to) {
2898 
2899                     if(strand2 != Seq_strand_minus) {
2900                         seq_int2_last->to += 3;
2901                     } else {
2902                         seq_int2_last->from -= 3;
2903                     }
2904 
2905                     if(seq_int1_last != 0)
2906                         seq_int1_last++;
2907                 }
2908 
2909             } else if (curr->op_type > 3) {
2910                 /* Protein piece is empty */
2911                 ValNodeAddPointer(&slp1, SEQLOC_EMPTY, SeqIdDup(subject_id));
2912                 /* Simulating insertion of nucleotides */
2913                 seq_int2 = SeqIntNew();
2914                 seq_int2->from = get_current_pos(&start2,
2915                                                  curr->num*(curr->op_type-3));
2916                 seq_int2->to = start2 - 1;
2917 
2918                 if(seq_int2->to >= query_length) {
2919                     seq_int2->to = query_length - 1;
2920                 }
2921 
2922                 /* Transfer to DNA minus strand coordinates */
2923                 if(strand2 == Seq_strand_minus) {
2924                     int tmp_int;
2925                     tmp_int = seq_int2->to;
2926                     seq_int2->to = original_length2 - seq_int2->from - 1;
2927                     seq_int2->from = original_length2 - tmp_int - 1;
2928                 }
2929 
2930                 seq_int2->id = SeqIdDup(query_id);
2931                 seq_int2->strand = strand2;
2932 
2933                 ValNodeAddPointer(&slp2, SEQLOC_INT, seq_int2);
2934 
2935                 seq_int1_last = NULL;
2936                 seq_int2_last = seq_int2; /* Will be used to adjust "to" value */
2937                 break;
2938             } else {
2939                 continue;       /* Main loop */
2940             }
2941             continue;       /* Main loop */
2942             /* break; */
2943         default:
2944             continue;       /* Main loop */
2945             /* break; */
2946         }
2947 
2948         if (reverse) {
2949             slp = slp1;
2950             slp1->next = slp2;
2951             sip = SeqIdDup(subject_id);
2952             sip->next = SeqIdDup(query_id);
2953         } else {
2954             slp = slp2;
2955             slp2->next = slp1;
2956             sip = SeqIdDup(query_id);
2957             sip->next = SeqIdDup(subject_id);
2958         }
2959 
2960         sseg = StdSegNew();
2961         sseg->dim = 2;
2962 
2963         if (sseg_head == NULL)
2964             sseg_head = sseg;
2965 
2966         sseg->loc = slp;
2967         sseg->ids = sip;
2968 
2969         if (sseg_old)
2970             sseg_old->next = sseg;
2971 
2972         sseg_old = sseg;
2973     }
2974     sap->segs = sseg_head;
2975     sap->next = NULL;
2976 
2977     return sap;
2978 }
2979 /*
2980 	SimpleIntervalToGapXEditBlock(Int4 start1, Int4 start2, Int4 length)
2981 
2982 	Int4 start1, start2: offset of this interval in sequence 1 and 2.
2983 	Int4 length: length of the interval.
2984 
2985 	May be used to produce a gapXEditBlock when there are no subs. or deletions
2986 	in the interval (e.g., ungapped BLAST HSP).
2987 */
2988 
2989 GapXEditBlockPtr LIBCALL
SimpleIntervalToGapXEditBlock(Int4 start1,Int4 start2,Int4 length)2990 SimpleIntervalToGapXEditBlock (Int4 start1, Int4 start2, Int4 length)
2991 
2992 {
2993 	GapXEditBlockPtr edit_block;
2994   	GapXEditScriptPtr e_script;
2995 
2996 	edit_block = GapXEditBlockNew(start1, start2);
2997 
2998 	edit_block->esp = e_script = GapXEditScriptNew(NULL);
2999 
3000 	e_script->op_type = GAPALIGN_SUB;
3001 	e_script->num = length;
3002 
3003 	return edit_block;
3004 }
3005 
3006