1 /* =========================================================================== 2 * 3 * PUBLIC DOMAIN NOTICE 4 * National Center for Biotechnology Information 5 * 6 * This software/database is a "United States Government Work" under the 7 * terms of the United States Copyright Act. It was written as part of 8 * the author's official duties as a United States Government employee and 9 * thus cannot be copyrighted. This software/database is freely available 10 * to the public for use. The National Library of Medicine and the U.S. 11 * Government have not placed any restriction on its use or reproduction. 12 * 13 * Although all reasonable efforts have been taken to ensure the accuracy 14 * and reliability of the software and data, the NLM and the U.S. 15 * Government do not and cannot warrant the performance or results that 16 * may be obtained by using this software or data. The NLM and the U.S. 17 * Government disclaim all warranties, express or implied, including 18 * warranties of performance, merchantability or fitness for any particular 19 * purpose. 20 * 21 * Please cite the author in any work or product based on this material. 22 * 23 * ===========================================================================*/ 24 /***************************************************************************** 25 26 File name: bandalgn.h 27 28 Author: Gennadiy Savchuk, Jinqhui Zhang, Tom Madden 29 30 Contents: prototypes to perform a global gapped alignment on two sequences. 31 32 ****************************************************************************/ 33 /* $Revision: 6.7 $ 34 * $Log: bandalgn.h,v $ 35 * Revision 6.7 2000/07/26 17:26:25 lewisg 36 * fix code for c++ inclusion 37 * 38 * Revision 6.6 2000/02/10 22:47:07 vakatov 39 * DLL'zation for MSVC on PC, Win-NT 40 * 41 * Revision 6.5 1999/03/17 16:49:10 madden 42 * Removed comment within comment 43 * 44 * Revision 6.4 1998/01/16 21:29:45 chappey 45 * Remove function CC_GetExtremes and use now SeqAlignStart, SeqAlignStop in salsap.c 46 * 47 * Revision 6.3 1997/10/22 14:41:39 chappey 48 * added CC_GetAlignExtremes, ChangeGlobalBandMatrix, CC_ExtendSeqAlign 49 * 50 * Revision 6.2 1997/10/02 16:18:20 tatiana 51 * *** empty log message *** 52 * 53 * Revision 6.1 1997/10/02 15:17:15 tatiana 54 * global align utility added 55 * 56 * Revision 6.0 1997/08/25 18:52:15 madden 57 * Revision changed to 6.0 58 * 59 * Revision 1.3 1997/06/23 16:16:13 tatiana 60 * GlobalBandAlign struct changed to use SeqLocs instead of SeqIds 61 * 62 * Revision 1.2 1997/03/05 17:31:21 savchuk 63 * definition of data_t has been moved from bandalgn.c 64 * 65 * Revision 1.1 1997/01/22 14:11:05 madden 66 * Initial revision 67 * 68 */ 69 70 #ifndef _G_BAND_H_ 71 #define _G_BAND_H_ 72 73 #include <ncbi.h> 74 #include <gapxdrop.h> 75 #include <blastkar.h> 76 #include <seqport.h> 77 #include <blast.h> 78 79 80 #undef NLM_EXTERN 81 #ifdef NLM_IMPORT 82 #define NLM_EXTERN NLM_IMPORT 83 #else 84 #define NLM_EXTERN extern 85 #endif 86 87 #ifdef __cplusplus 88 extern "C" { 89 #endif 90 91 92 #define MININT -999999 93 #define MAXINT 9999999 94 #define SMBAND 400 95 /* Should this be removed. */ 96 #define BND_DIGIT ((FloatHi)1.0) 97 98 #define PSU_MATRIX_SIZE 128 99 100 #ifdef notdef 101 enum {v,h}; 102 #endif 103 104 typedef struct DP { 105 Int4 CC, DD, CP, DPDP, FF, FP; 106 } PNTR dp_ptr, dp_node; 107 108 typedef struct { 109 dp_ptr CD; 110 Int4 IP; 111 Int4Ptr MP[4]; /* save crossing points */ 112 /* 0: rep, 1: del, 2: ins */ 113 Uint1Ptr MT[4]; 114 Int4Ptr FP; /* forward dividing points */ 115 Uint1Ptr FT; 116 Int4Ptr PNTR w; /* w = W */ 117 118 Int4 g, zzh, m, rr; /* g = G, zzh = H, m = g+zzh */ 119 120 Int4 leggA, leggB, reggA, reggB, leghA, leghB, reghA, reghB; 121 Int4Ptr sapp, sapp0; /* Current script append ptr */ 122 Int4 last; /* Last script op appended */ 123 Int1Ptr PNTR state; 124 } data_t; 125 126 #define gap(k) ((k) <= 0 ? 0 : (g + zzh * (k))) /* k-symbol indel cost */ 127 128 /* k-symbol indel cost */ 129 #define _gap(k) ((k) <= 0 ? 0 : (data->g + data->zzh * (k))) 130 131 /* k-symbol indel cost */ 132 #define gap_(k) ((k) <= 0 ? 0 : (data.g + data.zzh * (k))) 133 134 /* Append "Delete k" op . Not used*/ 135 #define _BND_DEL(k) \ 136 last = (last < 0) ? (sapp[-1] -= (k)) : (*sapp++ = -(k)); 137 138 /* Append "Delete k" op */ 139 #define _DEL(k) \ 140 data->last = (data->last < 0) ? (data->sapp[-1] -= (k)) : (*data->sapp++ = -(k)); 141 142 /* Append "Delete k" op */ 143 #define DEL_(k) \ 144 data.last = (data.last < 0) ? (data.sapp[-1] -= (k)) : (*data.sapp++ = -(k)); 145 146 /* Append "Insert k" op Not used*/ 147 #define BND_INS(k) \ 148 last = (last > 0) ? (sapp[-1] += (k)) : (*sapp++ = (k)); 149 150 /* Append "Insert k" op */ 151 #define _INS(k) \ 152 data->last = (data->last > 0) ? (data->sapp[-1] += (k)) : (*data->sapp++ = (k)); 153 154 /* Append "Insert k" op */ 155 #define INS_(k) \ 156 data.last = (data.last > 0) ? (data.sapp[-1] += (k)) : (*data.sapp++ = (k)); 157 158 /* Append "Replace" op */ 159 #define REP \ 160 {last = *sapp++ = 0;} 161 162 /* Append "Replace" op */ 163 #define _REP \ 164 {data->last = *data->sapp++ = 0;} 165 166 /* Append "Replace" op */ 167 #define REP_ \ 168 {data.last = *data.sapp++ = 0;} 169 170 #define REPP \ 171 {*sapp++ = MININT; last = 0;} 172 173 #define _REPP \ 174 {*data->sapp++ = MININT; data->last = 0;} 175 176 #define REPP_ \ 177 {*data.sapp++ = MININT; data.last = 0;} 178 179 /*********************************************************** 180 * 181 * PSUGapOptions are used for the serise banded alignment 182 * (global and local) with various end gap penalty. This 183 * option works for DNA-DNA, protein-protein and DNA-protein 184 * alignment. gshift is the penalty for frame shift, which 185 * only works for DNA-protein 186 * matrix was set only for DNA-protein and protein- 187 * protein alignment 188 * 189 ***********************************************************/ 190 typedef struct psu_gapped_options { 191 Int4 gopen; /* gap open */ 192 Int4 gext; /* gap extend penalties */ 193 Int4 gshift; /* frame-shift penalty, only applies to DNA-protein alignment */ 194 /* low and up are used to calculate start_diag and width, which are calculated 195 differently in local and global alignments. 196 */ 197 Int4 low, up; 198 Int4 start_diag; /* start diagonal of band */ 199 Int4 width; /* width for band alignment */ 200 Int4 lg1_ext; /*the left end gap extension penalty for the first sequence */ 201 Int4 rg1_ext; /*the right end gap ext. penalty for the first sequence */ 202 Int4 lg2_ext; /*the left end gap extension penalty for the second sequence */ 203 Int4 rg2_ext; /*the right end gap ext. penalty for the second sequence */ 204 Int4 lg1_open; 205 Int4 lg2_open; 206 Int4 rg1_open; 207 Int4 rg2_open; 208 } PSUGapOptions, PNTR PSUGapOptionsPtr; 209 210 /* 211 Functions to create and delte the PSUGapOptions, as well as 212 set default values. 213 */ 214 NLM_EXTERN PSUGapOptionsPtr PSUGapOptionsDelete(PSUGapOptionsPtr options); 215 NLM_EXTERN PSUGapOptionsPtr PSUGapOptionsCreate(Uint1 search_type); 216 217 218 219 /* Search choices for global banded alignments are (global search type): */ 220 #define G_BAND_LINEAR 0 /*global banded alignemnt in linear space*/ 221 #define G_BAND_QUADRATIC 1 /*global banded alignment in quadratic space*/ 222 #define G_BAND_LGAP 2 /*global banded alignment in linear space with 223 options for the four end gap penalties*/ 224 #define G_BAND_QGAP 3 /*global banded alignment in quadratic space, 225 with options for the four end gap penalties */ 226 #define G_BAND_L3GAP 4 /*global banded alignment in linear space, 227 with THREE gap penalties and options for 228 setting end gap penalties. Not sure if it 229 works ?*/ 230 #define G_BAND_Q3GAP 5 /*same as 4 except it runs in quadratic space 231 it WORKS! */ 232 233 /* Search choices for local banded alignments are (local search type): */ 234 #define L_BAND_LINEAR 10 /*local banded alignemnt in linear space*/ 235 #define L_BAND_QUADRATIC 11 /*local banded alignment in quadratic space*/ 236 #define L_BAND_LGAP 12 /*local banded alignment in linear space with 237 options for the four end gap penalties*/ 238 #define L_BAND_QGAP 13 /*local banded alignment in quadratic space, 239 with options for the four end gap penalties */ 240 #define L_BAND_L3GAP 14 /*local banded alignment in linear space, 241 with THREE gap penalties and options for 242 setting end gap penalties. Not sure if it 243 works ?*/ 244 #define L_BAND_Q3GAP 15 /*same as 4 except it runs in quadratic space 245 it WORKS! */ 246 247 248 /************************************************************************* 249 * 250 * The structure that is passed in during the call to Nlm_GlobalBand 251 * 252 *****************************************************************************/ 253 typedef struct global_band_struct { 254 /* The two sequences to be aligned. */ 255 Uint1Ptr seq1, 256 seq2; 257 Int4 seq1_length, /* length of sequence 1. */ 258 seq2_length; /* length of sequence 2. */ 259 /* used to identify sequence in GlobalBandToSeqAlign if filled in. */ 260 SeqLocPtr seqloc1, /* SeqLoc for the first sequence. */ 261 seqloc2; /* SeqLoc for the second. */ 262 263 Uint1 search_type; /* as in global search_type above */ 264 265 Int4Ptr PNTR matrix; /* scoring matrix, provided by and deleted by caller */ 266 PSUGapOptionsPtr options; /* parameters for search. */ 267 /* GapXEditBlockPtr filled in by TracebackToGapXEditBlock */ 268 /* A SeqAlign can be made from this. */ 269 GapXEditBlockPtr edit_block; 270 Int4 score; /* score of the alignment */ 271 Int4 alignment_length; /* length of the alignment. */ 272 } GlobalBandStruct, PNTR GlobalBandStructPtr; 273 274 /* 275 Deletes the GlobalBandStruct, including the options. 276 Does not delete the sequence matrix, or the ID's. 277 */ 278 NLM_EXTERN GlobalBandStructPtr GlobalBandStructDelete(GlobalBandStructPtr gbsp); 279 280 /* 281 Creates the GlobalBandStructPtr, needed to run GlobalBandToEditScript, 282 with the default values. 283 */ 284 NLM_EXTERN GlobalBandStructPtr GlobalBandStructCreate(Uint1 search_type); 285 286 /* 287 Performs a global alignment, producing a SeqAlign. 288 */ 289 NLM_EXTERN SeqAlignPtr GlobalBandToSeqAlign(GlobalBandStructPtr gbsp); 290 291 /* 292 Performs a global alignment, producing an EditBlock, which 293 can be made into a SeqAlign. 294 */ 295 NLM_EXTERN Boolean GlobalBandToEditBlock(GlobalBandStructPtr gbsp); 296 297 298 /************************************************************************* 299 * 300 * The structure that is passed in during the call to Nlm_GlobalBand 301 * 302 *****************************************************************************/ 303 typedef struct local_band_struct { 304 /* The two sequences to be aligned. */ 305 Uint1Ptr seq1, seq2; 306 Int4 seq1_length, /* length of sequence 1. */ 307 seq2_length; /* length of sequence 2. */ 308 /* used to identify sequence in GlobalBandToSeqAlign if filled in. */ 309 SeqIdPtr seqloc1, /* SeqLoc for the first sequence. */ 310 seqloc2; /* SeqLoc for the second. */ 311 312 Uint1 search_type; /* as in local search_type above */ 313 314 Int4Ptr PNTR matrix; /* scoring matrix provided by and deleted by caller */ 315 PSUGapOptionsPtr options; /* parameters for search. */ 316 /* GapXEditBlockPtr filled in by TracebackToGapXEditBlock */ 317 /* A SeqAlign can be made from this. */ 318 GapXEditBlockPtr edit_block; 319 Int4 score; /* score of the alignment */ 320 Int4 seq1_start, /* start of sequence one's alignment. */ 321 seq2_start, /* start of sequence two's alignment. */ 322 seq1_end, /* end of sequence one's alignment. */ 323 seq2_end; /* end of sequence two's alignment. */ 324 } LocalBandStruct, PNTR LocalBandStructPtr; 325 326 327 /* 328 Deletes the LocalBandStruct, including the options. 329 Does not delete the sequence matrix, or the ID's. 330 */ 331 NLM_EXTERN LocalBandStructPtr LocalBandStructDelete(LocalBandStructPtr gbsp); 332 333 /* 334 Creates the LocalBandStructPtr, needed to run LocalBandToEditScript, 335 with the default values. 336 */ 337 NLM_EXTERN LocalBandStructPtr LocalBandStructCreate(Uint1 search_type); 338 339 /* 340 Performs a global alignment, producing a SeqAlign. 341 */ 342 NLM_EXTERN SeqAlignPtr LocalBandToSeqAlign(LocalBandStructPtr lbsp); 343 344 /* 345 Performs a global alignment, producing an EditBlock, which 346 can be made into a SeqAlign. 347 */ 348 NLM_EXTERN Boolean LocalBandToEditBlock(LocalBandStructPtr lbsp); 349 350 351 /********************************************************* 352 * 353 * Int4 gband_linear_gap(A, B, M, N, option, S, Slen) 354 * compute the global alignment with flexible end gap 355 * penalty for DNA-DNA and protein protein alignment. 356 * The alignment is computed with linear space 357 * This function was originally from the g_band2.c file 358 * 359 * align the two sequences A, B 360 * A, B starts with index 1 361 * M, N is the length of A, B 362 * option sets the option of the alignment, 363 * which includes penalties for end gaps 364 * S is the script that contains the alignment results 365 * Slen stores the length of the alignment (the size of S) 366 * return the score of the alignment 367 * 368 ***********************************************************/ 369 370 extern Int4 LIBCALL gband_linear(Uint1Ptr Seq1, Uint1Ptr Seq2, 371 Int4 M, Int4 N, 372 Int4Ptr PNTR matrix, 373 PSUGapOptionsPtr option, 374 Int4Ptr S, Int4Ptr Slen); 375 376 extern Int4 LIBCALL gband_quadratic(Uint1Ptr Seq1, Uint1Ptr Seq2, 377 Int4 M, Int4 N, 378 Int4Ptr PNTR matrix, 379 PSUGapOptionsPtr option, 380 Int4Ptr S, Int4Ptr Slen); 381 382 extern Int4 LIBCALL gband_linear_gap(Uint1Ptr Seq1, Uint1Ptr Seq2, 383 Int4 M, Int4 N, 384 Int4Ptr PNTR matrix, 385 PSUGapOptionsPtr option, 386 Int4Ptr S, Int4Ptr Slen); 387 388 extern Int4 LIBCALL gband_linear_qgap(Uint1Ptr Seq1, Uint1Ptr Seq2, 389 Int4 M, Int4 N, 390 Int4Ptr PNTR matrix, 391 PSUGapOptionsPtr option, 392 Int4Ptr S, Int4Ptr Slen); 393 394 extern Int4 LIBCALL gband_l3gap(Uint1Ptr Seq1, Uint1Ptr Seq2, 395 Int4 M, Int4 N, 396 Int4Ptr PNTR matrix, 397 PSUGapOptionsPtr option, 398 Int4Ptr S, Int4Ptr Slen); 399 400 extern Int4 LIBCALL gband_q3gap(Uint1Ptr Seq1, Uint1Ptr Seq2, 401 Int4 M, Int4 N, 402 Int4Ptr PNTR matrix, 403 PSUGapOptionsPtr option, 404 Int4Ptr S, Int4Ptr Slen); 405 406 extern Int4 BAND_LOCAL_ALIGN(Uint1Ptr A, Uint1Ptr B, 407 Int4 M, Int4 N, 408 Int4Ptr PNTR matrix, 409 PSUGapOptionsPtr options, 410 Int4Ptr S, 411 Int4Ptr psi, Int4Ptr psj, 412 Int4Ptr pei, Int4Ptr pej, 413 Int4 align_type); 414 415 416 /********************************************************************** 417 * Global Alignment utility functions 418 **********************************************************************/ 419 420 NLM_EXTERN void SetGlobaltOptions(GlobalBandStructPtr gbsp, Int4 lg1_ext, Int4 rg1_ext, Int4 lg2_ext, Int4 rg2_ext, Int4 lg1_open, Int4 lg2_open, Int4 rg1_open, Int4 rg2_open, Int2 gopen, Int2 gext); 421 422 NLM_EXTERN GlobalBandStructPtr CreatBandStruct(SeqLocPtr slp1, SeqLocPtr slp2, Int4Ptr PNTR W, Boolean is_prot, Int2 method); 423 424 NLM_EXTERN void SetLowUpFromBlast(PSUGapOptionsPtr opt, Boolean is_prot, Int2 type, Int2 width, SeqLocPtr slp1, SeqLocPtr slp2); 425 426 NLM_EXTERN SeqAlignPtr GlobalBandByLoc(GlobalBandStructPtr gbsp, SeqLocPtr slp1, SeqLocPtr slp2, Boolean is_prot, Int2 band_method); 427 428 NLM_EXTERN SeqAlignPtr ExtendSeqAlign(SeqAlignPtr seqalign, Int4 start1, Int4 start2, Int4 stop1, Int4 stop2, Int4 x1, Int4 y1, Int4 x2, Int4 y2); 429 430 NLM_EXTERN SeqAlignPtr CC_ExtendSeqAlign(SeqAlignPtr sap, Int4 start1, Int4 start2, Int4 stop1, Int4 stop2, Int4 x1, Int4 y1, Int4 x2, Int4 y2, Uint1 strand1, Uint1 strand2); 431 432 NLM_EXTERN void GetAlignExtremes(SeqAlignPtr seqalign, Int4Ptr xx1, Int4Ptr yy1, Int4Ptr xx2, Int4Ptr yy2); 433 434 NLM_EXTERN Int2 ChangeGlobalBandMatrix(GlobalBandStructPtr gbsp, Boolean is_prot, CharPtr matrix_name, Int4 penalty, Int4 reward); 435 436 437 #ifdef __cplusplus 438 } 439 #endif 440 441 #undef NLM_EXTERN 442 #ifdef NLM_EXPORT 443 #define NLM_EXTERN NLM_EXPORT 444 #else 445 #define NLM_EXTERN 446 #endif 447 448 #endif 449