1 /* ===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================*/
24 /*****************************************************************************
25 
26 File name: bandalgn.h
27 
28 Author: Gennadiy Savchuk, Jinqhui Zhang, Tom Madden
29 
30 Contents: prototypes to perform a global gapped alignment on two sequences.
31 
32 ****************************************************************************/
33 /* $Revision: 6.7 $
34 * $Log: bandalgn.h,v $
35 * Revision 6.7  2000/07/26 17:26:25  lewisg
36 * fix code for c++ inclusion
37 *
38 * Revision 6.6  2000/02/10 22:47:07  vakatov
39 * DLL'zation for MSVC on PC, Win-NT
40 *
41 * Revision 6.5  1999/03/17 16:49:10  madden
42 * Removed comment within comment
43 *
44 * Revision 6.4  1998/01/16 21:29:45  chappey
45 * Remove function CC_GetExtremes and use now SeqAlignStart, SeqAlignStop in salsap.c
46 *
47  * Revision 6.3  1997/10/22 14:41:39  chappey
48  * added CC_GetAlignExtremes, ChangeGlobalBandMatrix, CC_ExtendSeqAlign
49  *
50  * Revision 6.2  1997/10/02 16:18:20  tatiana
51  * *** empty log message ***
52  *
53  * Revision 6.1  1997/10/02 15:17:15  tatiana
54  * global align utility added
55  *
56  * Revision 6.0  1997/08/25 18:52:15  madden
57  * Revision changed to 6.0
58  *
59  * Revision 1.3  1997/06/23 16:16:13  tatiana
60  * GlobalBandAlign struct changed to use SeqLocs instead of SeqIds
61  *
62  * Revision 1.2  1997/03/05  17:31:21  savchuk
63  * definition of data_t has been moved from bandalgn.c
64  *
65  * Revision 1.1  1997/01/22  14:11:05  madden
66  * Initial revision
67  *
68 */
69 
70 #ifndef _G_BAND_H_
71 #define _G_BAND_H_
72 
73 #include <ncbi.h>
74 #include <gapxdrop.h>
75 #include <blastkar.h>
76 #include <seqport.h>
77 #include <blast.h>
78 
79 
80 #undef NLM_EXTERN
81 #ifdef NLM_IMPORT
82 #define NLM_EXTERN NLM_IMPORT
83 #else
84 #define NLM_EXTERN extern
85 #endif
86 
87 #ifdef __cplusplus
88 extern "C" {
89 #endif
90 
91 
92 #define MININT -999999
93 #define MAXINT 9999999
94 #define SMBAND 400
95 /* Should this be removed. */
96 #define BND_DIGIT ((FloatHi)1.0)
97 
98 #define PSU_MATRIX_SIZE	128
99 
100 #ifdef notdef
101 enum {v,h};
102 #endif
103 
104 typedef struct DP {
105   Int4 CC, DD, CP, DPDP, FF, FP;
106 } PNTR dp_ptr, dp_node;
107 
108 typedef struct {
109   dp_ptr CD;
110   Int4 IP;
111   Int4Ptr MP[4];                /* save crossing points */
112 				/* 0: rep, 1: del, 2: ins */
113   Uint1Ptr MT[4];
114   Int4Ptr FP;                   /* forward dividing points */
115   Uint1Ptr FT;
116   Int4Ptr PNTR w;         /* w = W */
117 
118   Int4 g, zzh, m, rr;          /* g = G, zzh = H, m = g+zzh */
119 
120   Int4 leggA, leggB, reggA, reggB, leghA, leghB, reghA, reghB;
121   Int4Ptr sapp, sapp0;          /* Current script append ptr */
122   Int4  last;                   /* Last script op appended */
123   Int1Ptr PNTR state;
124 } data_t;
125 
126 #define gap(k) ((k) <= 0 ? 0 : (g + zzh * (k))) /* k-symbol indel cost */
127 
128 /* k-symbol indel cost */
129 #define _gap(k) ((k) <= 0 ? 0 : (data->g + data->zzh * (k)))
130 
131 /* k-symbol indel cost */
132 #define gap_(k) ((k) <= 0 ? 0 : (data.g + data.zzh * (k)))
133 
134 /* Append "Delete k" op . Not used*/
135 #define _BND_DEL(k) \
136 last = (last < 0) ? (sapp[-1] -= (k)) : (*sapp++ = -(k));
137 
138 /* Append "Delete k" op */
139 #define _DEL(k) \
140 data->last = (data->last < 0) ? (data->sapp[-1] -= (k)) : (*data->sapp++ = -(k));
141 
142 /* Append "Delete k" op */
143 #define DEL_(k) \
144 data.last = (data.last < 0) ? (data.sapp[-1] -= (k)) : (*data.sapp++ = -(k));
145 
146 /* Append "Insert k" op  Not used*/
147 #define BND_INS(k) \
148 last = (last > 0) ? (sapp[-1] += (k)) : (*sapp++ = (k));
149 
150 /* Append "Insert k" op */
151 #define _INS(k) \
152 data->last = (data->last > 0) ? (data->sapp[-1] += (k)) : (*data->sapp++ = (k));
153 
154 /* Append "Insert k" op */
155 #define INS_(k) \
156 data.last = (data.last > 0) ? (data.sapp[-1] += (k)) : (*data.sapp++ = (k));
157 
158 /* Append "Replace" op */
159 #define REP \
160 {last = *sapp++ = 0;}
161 
162 /* Append "Replace" op */
163 #define _REP \
164 {data->last = *data->sapp++ = 0;}
165 
166 /* Append "Replace" op */
167 #define REP_ \
168 {data.last = *data.sapp++ = 0;}
169 
170 #define REPP \
171 {*sapp++ = MININT; last = 0;}
172 
173 #define _REPP \
174 {*data->sapp++ = MININT; data->last = 0;}
175 
176 #define REPP_ \
177 {*data.sapp++ = MININT; data.last = 0;}
178 
179 /***********************************************************
180 *
181 *	PSUGapOptions are used for the serise banded alignment
182 *	(global and local) with various end gap penalty. This
183 *	option works for DNA-DNA, protein-protein and DNA-protein
184 *	alignment. gshift is the penalty for frame shift, which
185 *	only works for DNA-protein
186 *	matrix was set only for DNA-protein and protein-
187 *	protein alignment
188 *
189 ***********************************************************/
190 typedef struct psu_gapped_options {
191   Int4 gopen;         /* gap open */
192   Int4 gext;          /* gap extend penalties */
193   Int4 gshift;        /* frame-shift penalty, only applies to DNA-protein alignment */
194   /* low and up are used to calculate start_diag and width, which are calculated
195      differently in local and global alignments.
196   */
197   Int4 low, up;
198   Int4 start_diag;    /* start diagonal of band */
199   Int4 width;         /* width for band alignment */
200   Int4 lg1_ext;	      /*the left end gap extension penalty for the first sequence */
201   Int4 rg1_ext;	      /*the right end gap ext. penalty for the first sequence */
202   Int4 lg2_ext;	      /*the left end gap extension penalty for the second sequence */
203   Int4 rg2_ext;	      /*the right end gap ext. penalty for the second sequence */
204   Int4 lg1_open;
205   Int4 lg2_open;
206   Int4 rg1_open;
207   Int4 rg2_open;
208 } PSUGapOptions, PNTR PSUGapOptionsPtr;
209 
210 /*
211 	Functions to create and delte the PSUGapOptions, as well as
212 	set default values.
213 */
214 NLM_EXTERN PSUGapOptionsPtr PSUGapOptionsDelete(PSUGapOptionsPtr options);
215 NLM_EXTERN PSUGapOptionsPtr PSUGapOptionsCreate(Uint1 search_type);
216 
217 
218 
219 /* Search choices for global banded alignments are (global search type): */
220 #define G_BAND_LINEAR		0  /*global banded alignemnt in linear space*/
221 #define G_BAND_QUADRATIC	1  /*global banded alignment in quadratic space*/
222 #define G_BAND_LGAP		2  /*global banded alignment in linear space with
223 				    options for the four end gap penalties*/
224 #define G_BAND_QGAP		3  /*global banded alignment in quadratic space,
225 				    with options for the four end gap penalties */
226 #define G_BAND_L3GAP		4  /*global banded  alignment in linear space,
227 				     with THREE gap penalties and options for
228 				     setting end gap penalties. Not sure if it
229 				     works ?*/
230 #define G_BAND_Q3GAP		5   /*same as 4 except it runs in quadratic space
231 				     it WORKS! */
232 
233 /* Search choices for local banded alignments are (local search type): */
234 #define L_BAND_LINEAR		10  /*local banded alignemnt in linear space*/
235 #define L_BAND_QUADRATIC	11  /*local banded alignment in quadratic space*/
236 #define L_BAND_LGAP			12  /*local banded alignment in linear space with
237 				    options for the four end gap penalties*/
238 #define L_BAND_QGAP			13  /*local banded alignment in quadratic space,
239 				    with options for the four end gap penalties */
240 #define L_BAND_L3GAP		14  /*local banded  alignment in linear space,
241 				     with THREE gap penalties and options for
242 				     setting end gap penalties. Not sure if it
243 				     works ?*/
244 #define L_BAND_Q3GAP		15   /*same as 4 except it runs in quadratic space
245 				     it WORKS! */
246 
247 
248 /*************************************************************************
249 *
250 *	The structure that is passed in during the call to Nlm_GlobalBand
251 *
252 *****************************************************************************/
253 typedef struct global_band_struct {
254 	/* The two sequences to be aligned. */
255 	Uint1Ptr	seq1,
256 			seq2;
257 	Int4		seq1_length,	/* length of sequence 1. */
258 			seq2_length;	/* length of sequence 2. */
259 	/* used to identify sequence in GlobalBandToSeqAlign if filled in. */
260 	SeqLocPtr	seqloc1,		/* SeqLoc for the first sequence. */
261 			seqloc2;		/* SeqLoc for the second. */
262 
263 	Uint1		search_type; /* as in global search_type above */
264 
265   	Int4Ptr PNTR matrix;  /* scoring matrix, provided by and deleted by caller */
266 	PSUGapOptionsPtr	options;	/* parameters for search. */
267 	/* GapXEditBlockPtr filled in by TracebackToGapXEditBlock */
268 	/* A SeqAlign can be made from this. */
269         GapXEditBlockPtr edit_block;
270 	Int4 score;		/* score of the alignment */
271 	Int4 alignment_length;	/* length of the alignment. */
272 } GlobalBandStruct, PNTR GlobalBandStructPtr;
273 
274 /*
275         Deletes the GlobalBandStruct, including the options.
276         Does not delete the sequence matrix, or the ID's.
277 */
278 NLM_EXTERN GlobalBandStructPtr GlobalBandStructDelete(GlobalBandStructPtr gbsp);
279 
280 /*
281 	Creates the GlobalBandStructPtr, needed to run GlobalBandToEditScript,
282 	with the default values.
283 */
284 NLM_EXTERN GlobalBandStructPtr GlobalBandStructCreate(Uint1 search_type);
285 
286 /*
287 	Performs a global alignment, producing a SeqAlign.
288 */
289 NLM_EXTERN SeqAlignPtr GlobalBandToSeqAlign(GlobalBandStructPtr gbsp);
290 
291 /*
292 	Performs a global alignment, producing an EditBlock, which
293 	can be made into a SeqAlign.
294 */
295 NLM_EXTERN Boolean GlobalBandToEditBlock(GlobalBandStructPtr gbsp);
296 
297 
298 /*************************************************************************
299 *
300 *	The structure that is passed in during the call to Nlm_GlobalBand
301 *
302 *****************************************************************************/
303 typedef struct local_band_struct {
304 	/* The two sequences to be aligned. */
305 	Uint1Ptr	seq1, seq2;
306 	Int4		seq1_length,	/* length of sequence 1. */
307 				seq2_length;	/* length of sequence 2. */
308 	/* used to identify sequence in GlobalBandToSeqAlign if filled in. */
309 	SeqIdPtr	seqloc1,		/* SeqLoc for the first sequence. */
310 				seqloc2;		/* SeqLoc for the second. */
311 
312 	Uint1		search_type; /* as in local search_type above */
313 
314   	Int4Ptr PNTR matrix;  /* scoring matrix provided by and deleted by caller */
315 	PSUGapOptionsPtr	options;	/* parameters for search. */
316 	/* GapXEditBlockPtr filled in by TracebackToGapXEditBlock */
317 	/* A SeqAlign can be made from this. */
318         GapXEditBlockPtr edit_block;
319 	Int4		score;		/* score of the alignment */
320 	Int4		seq1_start,	/* start of sequence one's alignment. */
321 				seq2_start,	/* start of sequence two's alignment. */
322 				seq1_end,	/* end of sequence one's alignment. */
323 				seq2_end;	/* end of sequence two's alignment. */
324 } LocalBandStruct, PNTR LocalBandStructPtr;
325 
326 
327 /*
328         Deletes the LocalBandStruct, including the options.
329         Does not delete the sequence matrix, or the ID's.
330 */
331 NLM_EXTERN LocalBandStructPtr LocalBandStructDelete(LocalBandStructPtr gbsp);
332 
333 /*
334 	Creates the LocalBandStructPtr, needed to run LocalBandToEditScript,
335 	with the default values.
336 */
337 NLM_EXTERN LocalBandStructPtr LocalBandStructCreate(Uint1 search_type);
338 
339 /*
340 	Performs a global alignment, producing a SeqAlign.
341 */
342 NLM_EXTERN SeqAlignPtr LocalBandToSeqAlign(LocalBandStructPtr lbsp);
343 
344 /*
345 	Performs a global alignment, producing an EditBlock, which
346 	can be made into a SeqAlign.
347 */
348 NLM_EXTERN Boolean LocalBandToEditBlock(LocalBandStructPtr lbsp);
349 
350 
351 /*********************************************************
352 *
353 *	Int4 gband_linear_gap(A, B, M, N, option, S, Slen)
354 *	compute the global alignment with flexible end gap
355 *	penalty for DNA-DNA and protein protein alignment.
356 *	The alignment is computed with linear space
357 *	This function was originally from the g_band2.c file
358 *
359 *	align the two sequences A, B
360 *	A, B starts with index 1
361 *	M, N is the length of A, B
362 *	option sets the option of the alignment,
363 *	which includes penalties for end gaps
364 *	S is the script that contains the alignment results
365 *	Slen stores the length of the alignment (the size of S)
366 *	return the score of the alignment
367 *
368 ***********************************************************/
369 
370 extern Int4 LIBCALL gband_linear(Uint1Ptr Seq1, Uint1Ptr Seq2,
371 				 Int4 M, Int4 N,
372 				 Int4Ptr PNTR matrix,
373 				 PSUGapOptionsPtr option,
374 				 Int4Ptr S, Int4Ptr Slen);
375 
376 extern Int4 LIBCALL gband_quadratic(Uint1Ptr Seq1, Uint1Ptr Seq2,
377 				    Int4 M, Int4 N,
378 				    Int4Ptr PNTR matrix,
379 				    PSUGapOptionsPtr option,
380 				    Int4Ptr S, Int4Ptr Slen);
381 
382 extern Int4 LIBCALL gband_linear_gap(Uint1Ptr Seq1, Uint1Ptr Seq2,
383 				     Int4 M, Int4 N,
384 				     Int4Ptr PNTR matrix,
385 				     PSUGapOptionsPtr option,
386 				     Int4Ptr S, Int4Ptr Slen);
387 
388 extern Int4 LIBCALL gband_linear_qgap(Uint1Ptr Seq1, Uint1Ptr Seq2,
389 				      Int4 M, Int4 N,
390 				      Int4Ptr PNTR matrix,
391 				      PSUGapOptionsPtr option,
392 				      Int4Ptr S, Int4Ptr Slen);
393 
394 extern Int4 LIBCALL gband_l3gap(Uint1Ptr Seq1, Uint1Ptr Seq2,
395 				Int4 M, Int4 N,
396 				Int4Ptr PNTR matrix,
397 				PSUGapOptionsPtr option,
398 				Int4Ptr S, Int4Ptr Slen);
399 
400 extern Int4 LIBCALL gband_q3gap(Uint1Ptr Seq1, Uint1Ptr Seq2,
401 				Int4 M, Int4 N,
402 				Int4Ptr PNTR matrix,
403 				PSUGapOptionsPtr option,
404 				Int4Ptr S, Int4Ptr Slen);
405 
406 extern Int4 BAND_LOCAL_ALIGN(Uint1Ptr A, Uint1Ptr B,
407 				     Int4 M, Int4 N,
408 				     Int4Ptr PNTR matrix,
409 				     PSUGapOptionsPtr options,
410 				     Int4Ptr S,
411 				     Int4Ptr psi, Int4Ptr psj,
412 				     Int4Ptr pei, Int4Ptr pej,
413 				     Int4 align_type);
414 
415 
416 /**********************************************************************
417 *		Global Alignment utility functions
418 **********************************************************************/
419 
420 NLM_EXTERN void SetGlobaltOptions(GlobalBandStructPtr gbsp, Int4 lg1_ext, Int4 rg1_ext, Int4 lg2_ext, Int4 rg2_ext, Int4 lg1_open, Int4 lg2_open, Int4 rg1_open, Int4 rg2_open, Int2 gopen, Int2 gext);
421 
422 NLM_EXTERN GlobalBandStructPtr CreatBandStruct(SeqLocPtr slp1, SeqLocPtr slp2, Int4Ptr PNTR W, Boolean is_prot, Int2 method);
423 
424 NLM_EXTERN void SetLowUpFromBlast(PSUGapOptionsPtr opt, Boolean is_prot, Int2 type, Int2 width, SeqLocPtr slp1, SeqLocPtr slp2);
425 
426 NLM_EXTERN SeqAlignPtr GlobalBandByLoc(GlobalBandStructPtr gbsp, SeqLocPtr slp1, SeqLocPtr slp2,  Boolean is_prot, Int2 band_method);
427 
428 NLM_EXTERN SeqAlignPtr ExtendSeqAlign(SeqAlignPtr seqalign, Int4 start1, Int4 start2, Int4 stop1, Int4 stop2, Int4 x1, Int4 y1, Int4 x2, Int4 y2);
429 
430 NLM_EXTERN SeqAlignPtr CC_ExtendSeqAlign(SeqAlignPtr sap, Int4 start1, Int4 start2, Int4 stop1, Int4 stop2, Int4 x1, Int4 y1, Int4 x2, Int4 y2, Uint1 strand1, Uint1 strand2);
431 
432 NLM_EXTERN void GetAlignExtremes(SeqAlignPtr seqalign, Int4Ptr xx1, Int4Ptr yy1, Int4Ptr xx2, Int4Ptr yy2);
433 
434 NLM_EXTERN Int2 ChangeGlobalBandMatrix(GlobalBandStructPtr gbsp, Boolean is_prot, CharPtr matrix_name, Int4 penalty, Int4 reward);
435 
436 
437 #ifdef __cplusplus
438 }
439 #endif
440 
441 #undef NLM_EXTERN
442 #ifdef NLM_EXPORT
443 #define NLM_EXTERN NLM_EXPORT
444 #else
445 #define NLM_EXTERN
446 #endif
447 
448 #endif
449