1 /* =========================================================================== 2 * 3 * PUBLIC DOMAIN NOTICE 4 * National Center for Biotechnology Information 5 * 6 * This software/database is a "United States Government Work" under the 7 * terms of the United States Copyright Act. It was written as part of 8 * the author's official duties as a United States Government employee and 9 * thus cannot be copyrighted. This software/database is freely available 10 * to the public for use. The National Library of Medicine and the U.S. 11 * Government have not placed any restriction on its use or reproduction. 12 * 13 * Although all reasonable efforts have been taken to ensure the accuracy 14 * and reliability of the software and data, the NLM and the U.S. 15 * Government do not and cannot warrant the performance or results that 16 * may be obtained by using this software or data. The NLM and the U.S. 17 * Government disclaim all warranties, express or implied, including 18 * warranties of performance, merchantability or fitness for any particular 19 * purpose. 20 * 21 * Please cite the author in any work or product based on this material. 22 * 23 * ===========================================================================*/ 24 /***************************************************************************** 25 26 File name: blastdef.h 27 28 Author: Tom Madden 29 30 Contents: #defines and definitions for structures used by BLAST. 31 32 ******************************************************************************/ 33 /* $Revision: 6.169 $ 34 * $Log: blastdef.h,v $ 35 * Revision 6.169 2007/03/13 20:39:58 madden 36 * - Change the type of the dropoff_1st_pass, dropoff_2nd_pass, 37 * gap_x_dropoff, and gap_x_dropoff_final fields of the 38 * BLAST_OptionsBlk struct to Nlm_FloatHi. 39 * [from Mike Gertz] 40 * 41 * Revision 6.168 2006/05/03 14:41:50 madden 42 * Added a Boolean field "unified_p" to the BLAST_OptionsBlk 43 * structure. The field indicates whether to use a combination of 44 * alignment and compositional p-values when evaluating significance; 45 * the field is ignored unless composition-based statistics is on. 46 * (from Mike Gertz). 47 * 48 * Revision 6.167 2006/03/21 22:35:27 camacho 49 * Add support for setting database length in BLAST_WizardOptions{Blk,Mask} 50 * 51 * Revision 6.166 2006/01/24 18:38:15 papadopo 52 * from Mike Gertz: Remove #define'd constants that specify the composition adjustment mode. These have been replaced by an enum in the composition adjustment library 53 * 54 * Revision 6.165 2005/08/31 20:32:47 coulouri 55 * From Mike Gertz: 56 * - Added the kappa_expect_value field to the 57 * BLAST_OptionsBlk datatype. This new field holds the cutoff value 58 * used by RedoAlignmentCore; it should equal expect_value if 59 * RedoAlignmentCore will not be called. 60 * - removed the now unused original_expect_value field of the 61 * BLAST_OptionsBlk datatype. 62 * 63 * Revision 6.164 2005/07/27 15:51:54 coulouri 64 * remove unused queue_callback 65 * 66 * Revision 6.163 2005/05/16 17:43:29 papadopo 67 * From Alejandro Schaffer: Added support for compositional score 68 * matrix adjustment 69 * 70 * Revision 6.162 2005/04/25 14:16:36 coulouri 71 * set db_chunk_size adaptively 72 * 73 * Revision 6.161 2005/01/10 18:52:29 coulouri 74 * fixes from morgulis to allow concatenation of >255 queries in [t]blastn 75 * 76 * Revision 6.160 2004/11/19 13:22:05 madden 77 * Remove no_check_score completely (from Mike Gertz) 78 * 79 * Revision 6.159 2004/09/28 16:02:29 papadopo 80 * From Michael Gertz: Changed the "sumscore" field of an HSP to 81 * "xsum" to represent a normalized sum score of linked HSPs; 82 * the normalized score is more appropriate/useful in doing linking. 83 * 84 * Revision 6.158 2004/06/30 12:29:00 madden 85 * Removed typedef for BlastPruneSapStruct and some defines, moved to blfmtutl.h 86 * 87 * Revision 6.157 2004/04/30 12:45:45 coulouri 88 * bump version to 2.2.9 89 * 90 * Revision 6.156 2004/04/13 21:02:52 madden 91 * Add ignore_gilist Boolean to Options for use in formatting 92 * 93 * Revision 6.155 2004/02/04 15:35:03 camacho 94 * Rollback to fix problems in release 2.2.7 95 * 96 * Revision 6.154 2004/01/27 20:46:06 dondosha 97 * Allow values 0, 1, 2 for no_traceback megablast option 98 * 99 * Revision 6.153 2004/01/05 22:09:26 madden 100 * Put back dashes in date 101 * 102 * Revision 6.152 2004/01/02 13:44:32 coulouri 103 * Revert to hardcoded BLAST_RELEASE_DATE 104 * 105 * Revision 6.151 2003/12/29 15:51:18 coulouri 106 * Bump version, use __DATE__ instead of hardcoded date 107 * 108 * Revision 6.150 2003/11/06 19:52:13 dondosha 109 * Added error MBTemplateType, so it can be returned when wordsize/template length combination is wrong 110 * 111 * Revision 6.149 2003/10/02 19:30:11 madden 112 * add field seAlign to SWResults for use in kappa.c 113 * 114 * Revision 6.148 2003/04/09 14:18:34 madden 115 * Update version and release-date 116 * 117 * Revision 6.147 2003/03/26 15:45:48 boemker 118 * Documented relationships among BLAST_OptionsBlk, BLAST_WizardOptionsBlk, 119 * and BLAST_WizardOptionsMask. 120 * 121 * Revision 6.146 2003/03/25 22:23:06 boemker 122 * Replaced cutoff_s2, which isn't used, with cutoff_s, which is. 123 * Added query_lcase_mask. 124 * 125 * Revision 6.145 2003/03/25 19:58:18 boemker 126 * Moved code to initialize search options from blastcgicmd.cpp to here, as 127 * BLAST_Wizard et al. 128 * 129 * Revision 6.144 2003/03/24 19:42:14 madden 130 * Changes to support query concatenation for blastn and tblastn 131 * 132 * Revision 6.143 2002/11/22 23:28:43 dondosha 133 * Use array of structures instead of array of pointers for initial offset pairs 134 * 135 * Revision 6.142 2002/11/16 17:12:55 madden 136 * Change version and date 137 * 138 * Revision 6.141 2002/11/04 22:51:13 dondosha 139 * Changed FloatHi pvalue to Int4 num_ident in HSP structures 140 * 141 * Revision 6.140 2002/09/13 19:11:02 camacho 142 * Added rps_qlen field 143 * 144 * Revision 6.139 2002/09/11 21:15:23 camacho 145 * Removed obsolete #define and comment about BlastSeqIdList structure 146 * 147 * Revision 6.138 2002/09/11 20:46:25 camacho 148 * Removed deprecated BlastSeqIdListPtr code 149 * 150 * Revision 6.137 2002/08/26 15:49:51 madden 151 * Change release date and version 152 * 153 * Revision 6.136 2002/08/09 19:39:20 camacho 154 * Added constants for some blast search parameters 155 * 156 * Revision 6.135 2002/06/21 21:43:01 camacho 157 * Removed obsolete BlastSeqIdList structure and functions 158 * 159 * Revision 6.134 2002/05/17 21:40:13 dondosha 160 * Added 2 optimal Mega BLAST word templates for length 21 161 * 162 * Revision 6.133 2002/05/14 22:20:20 dondosha 163 * Renamed maximal discontiguous template type into optimal 164 * 165 * Revision 6.132 2002/04/23 20:59:53 madden 166 * Change version and date for release 167 * 168 * Revision 6.131 2002/04/09 18:16:43 dondosha 169 * Added more options/parameters for megablast 170 * 171 * Revision 6.130 2002/03/28 18:53:18 madden 172 * Add ValNodePtr mask1 to BlastSearch structure 173 * 174 * Revision 6.129 2001/12/28 20:38:39 dondosha 175 * Moved Mega BLAST related parameters into a separate structure 176 * 177 * Revision 6.128 2001/12/28 18:01:26 dondosha 178 * Added field scoreThisAlign to SWResults to allow more tie-breaking options 179 * 180 * Revision 6.127 2001/12/14 22:05:40 madden 181 * Changed version and release date 182 * 183 * Revision 6.126 2001/09/11 14:28:31 madden 184 * Added timed_out Boolean to SearchBlk 185 * 186 * Revision 6.125 2001/09/07 14:46:44 dondosha 187 * Roll back removal of threshold_first from functions and structures 188 * 189 * Revision 6.124 2001/09/06 20:24:34 dondosha 190 * Removed threshold_first 191 * 192 * Revision 6.123 2001/08/06 12:50:51 madden 193 * Change release date 194 * 195 * Revision 6.122 2001/07/12 19:50:24 madden 196 * Changed release date 197 * 198 * Revision 6.121 2001/06/28 13:42:09 madden 199 * Fixes to prevent overflow on number of hits reporting 200 * 201 * Revision 6.120 2001/06/12 19:48:56 madden 202 * Introduce total_hsp_limit, check before making SeqAlign 203 * 204 * Revision 6.119 2001/04/13 20:56:08 madden 205 * Updated version to 2.2.1, changed date 206 * 207 * Revision 6.118 2001/04/11 20:56:21 madden 208 * Added scalingFactor for rpsblast, changed release date 209 * 210 * Revision 6.117 2001/03/30 21:58:18 madden 211 * Change release date and version 212 * 213 * Revision 6.116 2001/03/27 21:27:01 madden 214 * Minor efficiency in how lookup table is made 215 * 216 * Revision 6.115 2001/03/19 18:52:57 madden 217 * Add base_offset element to structure for BlastHitRange 218 * 219 * Revision 6.114 2001/02/07 21:05:33 dondosha 220 * Added an output stream to BlastOptionsBlk 221 * 222 * Revision 6.113 2000/12/21 22:28:17 dondosha 223 * Added option and parameter for percent identity cutoff 224 * 225 * Revision 6.112 2000/11/29 16:17:56 dondosha 226 * Added a definition of small structure BLASTHSPSegment 227 * 228 * Revision 6.111 2000/11/14 18:14:00 madden 229 * release date to Nov-13-2000 230 * 231 * Revision 6.110 2000/11/08 22:18:05 dondosha 232 * Added longest_intron integer option and parameter 233 * 234 * Revision 6.109 2000/11/07 16:30:25 madden 235 * Introduce intermediate score (before linking of HSPs) for blastx and tblastn 236 * 237 * Revision 6.108 2000/11/03 20:16:24 dondosha 238 * Changed one_line_results option and parameter to more meaningful no_traceback 239 * 240 * Revision 6.107 2000/11/01 16:25:56 madden 241 * Changes from Futamura for psitblastn 242 * 243 * Revision 6.106 2000/10/18 19:53:19 shavirin 244 * Empty log message. 245 * 246 * Revision 6.105 2000/10/18 19:17:56 shavirin 247 * Changed BLAST_ENGINE_VERSION and BLAST_RELEASE_DATE 248 * 249 * Revision 6.104 2000/10/05 19:50:49 dondosha 250 * Added mb_result_struct to the BlastSearchBlk to be used instead of result_struct in Mega BLAST 251 * 252 * Revision 6.103 2000/09/28 14:48:20 dondosha 253 * Added exact_match_array to hitlist structure for megablast initial hits 254 * 255 * Revision 6.102 2000/09/21 19:16:30 madden 256 * increase AWAKE_THR_MIN_SIZE by 100 257 * 258 * Revision 6.101 2000/08/29 19:35:49 madden 259 * Add gilist_not_owned to blast_gi_list 260 * 261 * Revision 6.100 2000/08/08 20:37:21 madden 262 * increase version number to 2.1.1 and release date 263 * 264 * Revision 6.99 2000/07/17 14:05:22 shavirin 265 * Added parameter Out-Of-Frame shift penalty and query DNAP sequence 266 * 267 * Revision 6.98 2000/07/11 18:38:02 madden 268 * decreased size of helper array, added prefetch to BlastGappedScoreInternal 269 * 270 * Revision 6.97 2000/07/11 17:16:20 shavirin 271 * Added new parameter is_ooframe for Out-Of-Frame gapping algorithm. 272 * 273 * Revision 6.96 2000/07/10 15:41:28 madden 274 * Add typedef for BLAST_HSP_helper 275 * 276 * Revision 6.95 2000/07/07 21:20:07 vakatov 277 * Get all "#include" out of the 'extern "C" { }' scope! 278 * 279 * Revision 6.94 2000/07/06 17:24:55 dondosha 280 * Added option and parameter megablast_full_deflines 281 * 282 * Revision 6.93 2000/06/30 17:52:45 madden 283 * Move AWAKE_THR_MIN_SIZE to blastdef.h 284 * 285 * Revision 6.92 2000/06/29 20:30:03 madden 286 * Update version and date 287 * 288 * Revision 6.91 2000/06/08 20:34:18 madden 289 * add explode_seqids option to show all ids in a defline 290 * 291 * Revision 6.90 2000/05/26 20:04:57 madden 292 * Raise version and date 293 * 294 * Revision 6.89 2000/05/12 19:40:59 dondosha 295 * Added qid_array element to BlastSearchBlk 296 * 297 * Revision 6.88 2000/05/01 19:04:31 shavirin 298 * Changed parameter level in BlastErrorMsg structure from Uint1 to Uint2. 299 * 300 * Revision 6.87 2000/04/21 20:48:05 madden 301 * Change version and date 302 * 303 * Revision 6.86 2000/04/06 14:47:10 madden 304 * Added original_expect_value 305 * 306 * Revision 6.85 2000/04/03 21:20:03 dondosha 307 * Added option and parameter is_neighboring 308 * 309 * Revision 6.84 2000/03/31 19:10:44 dondosha 310 * Changed some names related to MegaBlast 311 * 312 * Revision 6.83 2000/03/13 21:01:24 dondosha 313 * Added boolean option sort_gi_list to options block structure 314 * 315 * Revision 6.82 2000/02/29 18:17:23 shavirin 316 * Variable query_dna_mask changed to query_lcase_mask. 317 * 318 * Revision 6.81 2000/02/18 15:30:36 shavirin 319 * Added parameter query_dna_mask into options and parameters. 320 * 321 * Revision 6.80 2000/02/17 21:23:09 shavirin 322 * Added parameter is_rps_blast. 323 * 324 * Revision 6.79 2000/02/17 19:00:44 shavirin 325 * Removed theCacheSize parameter from everywhere. 326 * 327 * Revision 6.78 2000/02/15 19:06:09 shavirin 328 * Added parameter filter_string into BLAST_ParameterBlk structure. 329 * 330 * Revision 6.77 2000/02/02 18:21:51 madden 331 * Add LinkHelpStruct definition 332 * 333 * Revision 6.76 2000/02/02 16:52:43 dondosha 334 * Added option one_line_results to BLAST_OptionsBlk and BLAST_ParameterBlk 335 * 336 * Revision 6.75 2000/02/01 18:02:22 dondosha 337 * Added greedy alignment option to BLAST_OptionsBlk and query context offsets array to BlastSearchBlk 338 * 339 * Revision 6.74 2000/01/26 22:00:52 madden 340 * Added subject_index field to SWResults 341 * 342 * Revision 6.73 2000/01/20 19:12:00 madden 343 * Change BLAST version and date 344 * 345 * Revision 6.72 2000/01/13 18:10:43 madden 346 * Fix problem with incorrect stat values for blastn and missing hits 347 * 348 * Revision 6.71 2000/01/11 17:02:48 shavirin 349 * Added element theCacheSize into BLAST_OptionsBlk and BLAST_ParameterBlk. 350 * 351 * Revision 6.70 1999/12/31 14:23:19 egorov 352 * Add support for using mixture of real and maks database with gi-list files: 353 * 1. Change logic of creating rdfp list. 354 * 2. BlastGetDbChunk gets real databases first, then masks. 355 * 3. Propoper calculation of database sizes using alias files. 356 * 4. Change to CommonIndex to support using of mask databases. 357 * 5. Use correct gis in formated output (BlastGetAllowedGis()). 358 * 6. Other small changes 359 * 360 * Revision 6.69 1999/12/21 20:04:15 egorov 361 * gi_list now contains start position for corresponding database 362 * 363 * Revision 6.68 1999/11/30 18:23:08 shavirin 364 * Added parameter max_num_patterns to the BLAST_OptionsBlkPtr structure 365 * 366 * Revision 6.67 1999/11/15 22:03:31 madden 367 * added Boolean isFirstAlignment to SWResults 368 * 369 * Revision 6.66 1999/11/12 20:57:39 shavirin 370 * Added parameter use_best_align into BLAST_ParameterBlkPtr 371 * 372 * Revision 6.65 1999/11/12 16:37:30 shavirin 373 * Added new option use_best_align into Blast options. 374 * 375 * Revision 6.64 1999/10/26 20:45:19 madden 376 * Add use_real_db_size option 377 * 378 * Revision 6.63 1999/10/05 17:42:54 shavirin 379 * Removed global variables from blast.c 380 * 381 * Revision 6.62 1999/09/28 20:14:31 madden 382 * Joerg changes to mimize cache misses 383 * 384 * Revision 6.61 1999/08/31 13:42:23 madden 385 * Moved SWResults to blastdef.h from profiles.h 386 * 387 * Revision 6.60 1999/08/27 18:07:33 shavirin 388 * Passed parameter decline_align from top to the engine. 389 * 390 * Revision 6.59 1999/08/26 14:56:49 madden 391 * Raise version and date 392 * 393 * Revision 6.58 1999/08/26 14:55:16 madden 394 * Fixed Int8 problem 395 * 396 * Revision 6.57 1999/08/20 19:47:41 madden 397 * removed version element 398 * 399 * Revision 6.56 1999/08/17 18:37:12 shavirin 400 * Added phi_pattern element into options block. 401 * 402 * Revision 6.55 1999/08/17 14:02:34 madden 403 * add smith_waterman and tweak_parameters fields to Options 404 * 405 * Revision 6.54 1999/05/10 18:47:52 madden 406 * Changed version to 2.0.9 407 * 408 * Revision 6.53 1999/05/08 15:04:24 madden 409 * Changed version and release date 410 * 411 * Revision 6.52 1999/04/23 19:25:01 madden 412 * Fixes a prototype complaint 413 * 414 * Revision 6.51 1999/04/23 16:45:54 madden 415 * call BQ_IncSemaphore as callback 416 * 417 * Revision 6.50 1999/04/22 16:46:13 shavirin 418 * Added semaphore ID to the search_blk structure. 419 * 420 * Revision 6.49 1999/04/01 21:42:47 madden 421 * Fix memory leaks when gi list is used 422 * 423 * Revision 6.48 1999/03/18 21:13:32 egorov 424 * The "output" filed added to search block. This is VoidPtr and an application can 425 * use it as stream, ASNIO, etc to output blast results. 426 * 427 * Revision 6.47 1999/03/17 16:49:11 madden 428 * Removed comment within comment 429 * 430 * Revision 6.46 1999/02/17 13:23:01 madden 431 * Added hsp_num_max 432 * 433 * Revision 6.45 1999/01/28 16:04:56 madden 434 * do_not_reallocate Boolean for HSPs 435 * 436 * Revision 6.44 1999/01/26 17:56:37 madden 437 * query_id added to HitRange 438 * 439 * Revision 6.43 1999/01/05 13:57:19 madden 440 * Changed version and release date 441 * 442 * Revision 6.42 1998/12/31 18:17:03 madden 443 * Added strand option 444 * 445 * Revision 6.41 1998/12/29 17:45:06 madden 446 * Add do_sum_stats flag 447 * 448 * Revision 6.40 1998/12/21 13:09:53 madden 449 * Changed version and release date 450 * 451 * Revision 6.39 1998/11/04 01:36:05 egorov 452 * Add support for entrez-query and org-name to blast3 453 * 454 * Revision 6.38 1998/09/16 18:58:57 madden 455 * Changed release number and date 456 * 457 * Revision 6.37 1998/09/14 15:11:15 egorov 458 * Add support for Int8 length databases; remove unused variables 459 * 460 * Revision 6.36 1998/07/30 19:00:32 madden 461 * Change to allow search of subset of database 462 * 463 * Revision 6.35 1998/07/28 21:17:59 madden 464 * Added do_not_reevaluate 465 * 466 * Revision 6.34 1998/07/25 14:26:38 madden 467 * Added comments 468 * 469 * Revision 6.33 1998/07/22 12:16:25 madden 470 * Added handle_results 471 * 472 * Revision 6.32 1998/07/21 20:58:04 madden 473 * Changes to allow masking at hash only 474 * 475 * Revision 6.31 1998/07/17 15:39:56 madden 476 * Changes for Effective search space. 477 * 478 * Revision 6.30 1998/07/14 20:17:05 egorov 479 * Add two new parameters (gilist and gifile) to BLAST_OptionsBlk 480 * 481 * Revision 6.29 1998/06/17 18:10:07 madden 482 * Added isPatternSearch to Options 483 * 484 * Revision 6.28 1998/06/12 16:08:49 madden 485 * BlastHitRange stuff 486 * 487 * Revision 6.27 1998/05/28 19:59:16 madden 488 * Added typedef for BLASTHeapStruct 489 * 490 * Revision 6.26 1998/05/17 16:28:43 madden 491 * Allow changes to filter options and cc filtering. 492 * 493 * Revision 6.25 1998/05/05 13:56:38 madden 494 * Raised version to 2.0.5 and changed date 495 * 496 * Revision 6.24 1998/04/24 19:27:05 madden 497 * Added BlastMatrixRescalePtr 498 * 499 * Revision 6.23 1998/04/01 22:47:14 madden 500 * Added query_invalid flag 501 * 502 * Revision 6.22 1998/03/24 15:38:22 madden 503 * Use BlastDoubleInt4Ptr to keep track of gis and ordinal_ids 504 * 505 * Revision 6.21 1998/03/18 14:14:20 madden 506 * Support random access by gi list 507 * 508 * Revision 6.20 1998/03/14 18:29:21 madden 509 * Added BlastSeqIdListPtr 510 * 511 * Revision 6.19 1998/02/26 22:34:37 madden 512 * Changes for 16 bit windows 513 * 514 * Revision 6.18 1998/02/26 19:10:37 madden 515 * Removed elements with BLAST_COLLECT_SPECIAL_STATS defines 516 * 517 * Revision 6.17 1998/02/24 22:46:29 madden 518 * Added perform_culling Boolean and changed release date 519 * 520 * Revision 6.16 1998/02/19 17:17:10 madden 521 * Use of Int4 rather than Int2 when pruning SeqAlign 522 * 523 * Revision 6.15 1998/01/05 16:46:52 madden 524 * One or both strands can be searched, as opposed to only both, changes to number of contexts 525 * 526 * Revision 6.14 1997/12/23 19:14:14 madden 527 * release version to 2.0.4 528 * 529 * Revision 6.13 1997/12/23 18:12:32 madden 530 * Changes for range-dependent blast 531 * 532 * Revision 6.12 1997/12/12 20:38:02 madden 533 * Fix to comments 534 * 535 * Revision 6.11 1997/12/11 22:20:16 madden 536 * Corrected blast_type defines 537 * 538 * Revision 6.10 1997/12/10 22:41:40 madden 539 * program number defines 540 * 541 * Revision 6.9 1997/11/14 21:30:16 madden 542 * Changed version and date 543 * 544 * Revision 6.8 1997/10/26 17:26:59 madden 545 * Changes for range dependent limits 546 * 547 * Revision 6.7 1997/10/01 13:35:28 madden 548 * Changed BLAST_VERSION to BLAST_ENGINE_VERSION 549 * 550 * Revision 6.6 1997/09/22 17:36:24 madden 551 * MACROS for position-specific matrices from Andy Neuwald 552 * 553 * Revision 6.5 1997/09/18 22:22:12 madden 554 * Added prune functions 555 * 556 * Revision 6.4 1997/09/11 18:49:26 madden 557 * Changes to enable searches against multiple databases. 558 * 559 * Revision 6.3 1997/09/10 21:27:57 madden 560 * Changes to set CPU limits 561 * 562 * Revision 6.2 1997/09/03 19:06:35 madden 563 * changed BLAST_VERSION and BLAST_RELEASE_DATE 564 * 565 * Revision 6.1 1997/08/27 14:46:48 madden 566 * Changes to enable multiple DB searches 567 * 568 * Revision 6.0 1997/08/25 18:52:32 madden 569 * Revision changed to 6.0 570 * 571 * Revision 1.63 1997/08/20 21:43:10 madden 572 * Updated release date 573 * 574 * Revision 1.62 1997/07/21 17:37:15 madden 575 * Added define for BLAST_RELEASE_DATE 576 * 577 * Revision 1.61 1997/07/18 20:55:45 madden 578 * Added BLAST_VERSION 579 * 580 * Revision 1.60 1997/07/15 20:36:43 madden 581 * Added ValNodePtr mask 582 * 583 * Revision 1.59 1997/07/14 15:33:00 madden 584 * typedef for BlastErrorMsg 585 * 586 * Revision 1.58 1997/05/22 21:24:52 madden 587 * Added support for final gapX dropoff value 588 * 589 * Revision 1.57 1997/05/20 17:51:33 madden 590 * Added element SeqLocPtr query_slp to BlastSearch 591 * 592 * Revision 1.56 1997/05/06 22:19:35 madden 593 * Added use_large_gaps and subject_length 594 * 595 * Revision 1.55 1997/04/09 20:01:53 madden 596 * Added seqid_list to SearchBlk 597 * 598 * Revision 1.54 1997/04/03 19:48:13 madden 599 * Changes to use effective database length instead of the length of each 600 * sequence in statistical calculations. 601 * 602 * Revision 1.53 1997/03/31 17:07:57 madden 603 * Added BLAST_COLLECT_STATS define. 604 * 605 * Revision 1.52 1997/03/20 22:56:24 madden 606 * Added gap_info to hsp. 607 * 608 * Revision 1.51 1997/03/14 22:06:11 madden 609 * fixed MT bug in BlastReevaluateWithAmbiguities. 610 * 611 * Revision 1.50 1997/03/08 16:52:16 madden 612 * y 613 * Added discontinuous option to ParameterBlk. 614 * 615 * Revision 1.49 1997/02/25 19:17:05 madden 616 * Added discontinuous flag to options. 617 * 618 * Revision 1.48 1997/02/23 16:44:47 madden 619 * GapAlignBlkPtr added to search structure. 620 * 621 * Revision 1.47 1997/02/20 18:38:34 madden 622 * Added Int4 db_length to Options block. 623 * 624 * Revision 1.46 1997/02/18 21:03:00 madden 625 * Added #define FILTER_NONE 0. 626 * 627 * Revision 1.45 1997/02/17 17:40:18 madden 628 * Added seqalign to ResultHitlistptr 629 * 630 * Revision 1.44 1997/02/11 19:30:54 madden 631 * Added program_name to Options. 632 * 633 * Revision 1.43 1997/02/10 20:27:01 madden 634 * Changed some CharPtr's into Uint1Ptr's. 635 * 636 * Revision 1.42 1997/02/10 20:14:23 madden 637 * replaced doubles by Nlm_FloatHi's. 638 * 639 * Revision 1.41 1997/02/10 20:03:58 madden 640 * Added specific to BlastAllWordsPtr. 641 * 642 * Revision 1.40 1997/02/10 15:36:40 madden 643 * added posConverged to the BlastSearchBlk. 644 * 645 * Revision 1.39 1997/02/06 14:27:15 madden 646 * Addition of BlastAllWord structure. 647 * 648 * Revision 1.38 1997/02/03 13:02:12 madden 649 * Added length to BLASTSubjectInfo. 650 * 651 * Revision 1.37 1997/01/17 17:41:44 madden 652 * Added flags for position based BLAST. 653 * 654 * Revision 1.36 1997/01/13 15:37:05 madden 655 * Changed prototypes for star_callback and tick_callback. 656 * 657 * Revision 1.35 1997/01/11 18:22:10 madden 658 * Changes to allow S2 to be set. 659 * 660 * Revision 1.34 1997/01/09 17:44:35 madden 661 * Added "bit_score" to BLASTResultHsp. 662 * 663 * Revision 1.33 1996/12/27 20:44:10 madden 664 * Chnages to require that part of the query be included. 665 * 666 * Revision 1.32 1996/12/23 14:04:44 madden 667 * Added gap_trigger. 668 * 669 * Revision 1.31 1996/12/20 21:11:40 madden 670 * Changes to allow multiple hits runs only. 671 * 672 * Revision 1.30 1996/12/18 14:33:13 madden 673 * Added high_score element. 674 * 675 * Revision 1.29 1996/12/17 17:27:03 madden 676 * Count number of attempted gappings. 677 * 678 * Revision 1.28 1996/12/17 13:47:57 madden 679 * Added star_proc. 680 * 681 * Revision 1.27 1996/12/16 14:35:48 madden 682 * Added gapped_calculation Boolean 683 * 684 * Revision 1.26 1996/12/13 22:00:23 madden 685 * Corrected starting point for gapped extension with traceback. 686 * 687 * Revision 1.25 1996/12/13 18:13:56 madden 688 * Added tick callback functions 689 * 690 * Revision 1.24 1996/12/13 15:09:31 madden 691 * Changes to parameters used for gapped extensions. 692 * 693 * Revision 1.23 1996/12/09 23:24:05 madden 694 * Added parameters to control which sequences get a gapped alignment. 695 * 696 * Revision 1.22 1996/12/08 15:19:59 madden 697 * Added parameters for gapped alignments. 698 * 699 * Revision 1.21 1996/11/27 21:56:57 madden 700 * Removed define for XNU. 701 * 702 * Revision 1.20 1996/11/18 18:07:57 madden 703 * *** empty log message *** 704 * 705 * Revision 1.19 1996/11/18 17:28:13 madden 706 * Added BLAST_SEARCH_ALLOC_TRANS_INFO define. 707 * 708 * Revision 1.18 1996/11/18 15:45:40 madden 709 * Defines for filter type added (by S. Shavirin),. 710 * 711 * Revision 1.17 1996/11/15 17:54:54 madden 712 * Added support for alternate genetic codes for blastx, tblast[nx]. 713 * 714 * Revision 1.16 1996/11/13 22:35:18 madden 715 * Added genetic_code and db_genetic_code elements to blastdef.h 716 * 717 * Revision 1.15 1996/11/12 16:21:53 madden 718 * Added context_factor 719 * 720 * Revision 1.14 1996/11/06 22:10:01 madden 721 * translation_buffer changed from CharPtr to Uint1Ptr. 722 * 723 * Revision 1.13 1996/11/04 16:59:43 madden 724 * Added translation_table and translation_table_rc elements 725 * to BlastSearchBlk. 726 * 727 * Revision 1.12 1996/10/03 20:49:29 madden 728 * Added xsum member to HSP_Link structure. 729 * ,. 730 * 731 * Revision 1.11 1996/10/01 21:24:02 madden 732 * Added e2. 733 * 734 * Revision 1.10 1996/09/26 13:02:32 madden 735 * Removed ifdef for BLAST_COLLECT_STATS with counters. 736 * 737 * Revision 1.9 1996/09/12 21:13:46 madden 738 * *** empty log message *** 739 * 740 * Revision 1.8 1996/09/11 22:21:51 madden 741 * *** empty log message *** 742 * 743 * Revision 1.7 1996/09/11 19:14:09 madden 744 * Added BLAST_OptionsBlkPtr structure and use thereof. 745 * 746 * Revision 1.6 1996/08/14 18:16:13 madden 747 * removed frame from Context. 748 * 749 * Revision 1.5 1996/08/14 17:19:02 madden 750 * Added frame to BlastSeqBlkPtr. 751 * 752 * Revision 1.4 1996/08/13 15:26:29 madden 753 * Changes for tblastn. 754 * 755 * Revision 1.3 1996/08/09 22:11:12 madden 756 * Added original_sequence to BlastSequenceBlk. 757 * 758 * Revision 1.2 1996/08/07 14:24:42 madden 759 * Removed include for blast18p.h and objblst2.h 760 * 761 * Revision 1.1 1996/08/05 20:32:18 madden 762 * Initial revision 763 * 764 * Revision 1.51 1996/08/02 14:20:06 madden 765 * Removed multiproc strucutre. 766 * 767 * Revision 1.50 1996/07/31 13:09:17 madden 768 * Changes for threaded blast. 769 * 770 * Revision 1.49 1996/07/24 12:01:28 madden 771 * Changes for blastx 772 * 773 * Revision 1.48 1996/07/18 22:00:49 madden 774 * Addition of BLAST_ExtendWordParams structure. 775 * 776 * Revision 1.47 1996/07/18 13:36:34 madden 777 * Addition of the BLASTContextStructPtr. 778 * 779 * Revision 1.46 1996/07/16 14:37:42 madden 780 * Removed _blast_link_structure . 781 * 782 * Revision 1.45 1996/07/11 16:03:58 madden 783 * SaveCurrentHitlist keeps track of which set an HSP belongs to. 784 * 785 * Revision 1.44 1996/07/02 14:33:16 madden 786 * Added hspcnt_max. 787 * 788 * Revision 1.43 1996/07/02 12:04:15 madden 789 * HSP's saved on array, rather than linked list. 790 * 791 * Revision 1.42 1996/06/26 19:38:12 madden 792 * Removed ifdef. 793 * 794 * Revision 1.41 1996/06/24 20:26:46 madden 795 * Added dropoff_1st_pass and dropoff_2nd_pass to ParameterBlkPtr. 796 * 797 * Revision 1.40 1996/06/24 17:58:21 madden 798 * Removed X_set parameter, added right and left dropoff's. 799 * 800 * Revision 1.39 1996/06/20 16:15:57 madden 801 * Replaced int's with Int4's. 802 * 803 * Revision 1.38 1996/06/19 14:19:53 madden 804 * Added define for BLASTSubjectInfoPtr. 805 * 806 * Revision 1.37 1996/06/17 19:03:07 madden 807 * Rmoved unused structure. 808 * 809 * Revision 1.36 1996/06/14 17:58:13 madden 810 * Changes to avoid nulling out arrays for every sequence. 811 * 812 * Revision 1.35 1996/06/13 21:03:06 madden 813 * Added actual_window element to ExtendWord structure. 814 * 815 * Revision 1.34 1996/06/11 17:58:31 madden 816 * Changes to allow shorter arrays for multiple hits type blast. 817 * 818 * Revision 1.33 1996/06/10 16:52:16 madden 819 * Use bit-shifting and masking instead of dividing and remainder. 820 * 821 * Revision 1.32 1996/06/10 13:44:07 madden 822 * Changes to reduce the size of the "already visited" array. 823 * 824 * Revision 1.31 1996/06/06 17:55:16 madden 825 * Added number_of_bits to ParameterBlkPtr. 826 * 827 * Revision 1.30 1996/06/06 13:23:17 madden 828 * Added elements cutoff_big_gap and ignore_small_gaps to ParameterBlkPt. 829 * 830 * Revision 1.29 1996/05/29 12:44:04 madden 831 * Added structure BlastTimeKeeper. 832 * 833 * Revision 1.28 1996/05/28 14:16:32 madden 834 * Added Int4's to collect statistics info. 835 * 836 * Revision 1.27 1996/05/23 21:55:04 madden 837 * Removed unused variable initlen 838 * 839 * Revision 1.26 1996/05/23 21:48:23 madden 840 * Removed unused defines. 841 * 842 * Revision 1.25 1996/05/16 19:51:09 madden 843 * Added documentation block. 844 * 845 * Revision 1.24 1996/05/16 13:29:38 madden 846 * Added defines for contiguous or discontiguous calls. 847 * 848 * Revision 1.23 1996/05/01 15:00:00 madden 849 * Added BlastResults sturcture defs. 850 * 851 * Revision 1.22 1996/04/24 16:17:26 madden 852 * Added new structure, BLAST_Link. 853 * 854 * Revision 1.21 1996/04/24 12:52:48 madden 855 * ID's for sequences simplified. 856 * 857 * Revision 1.20 1996/04/03 19:14:35 madden 858 * Removed defunct HSP ptr's. 859 * 860 * Revision 1.19 1996/03/29 21:27:43 madden 861 * "hitlist" now kept on SeqAlign rather than HitList. 862 * 863 * Revision 1.17 1996/03/27 19:51:53 madden 864 * "current_hitlist" added to Search Structure. 865 * 866 * Revision 1.16 1996/03/26 19:36:59 madden 867 * Added ReadDBFILEPtr to Search structure. 868 * 869 * Revision 1.15 1996/03/25 16:35:18 madden 870 * Added old_stats. 871 * 872 * Revision 1.14 1996/02/28 21:37:43 madden 873 * Added "trim" variables to segments for HSP. 874 * 875 * Revision 1.13 1996/02/06 22:51:13 madden 876 * Added "prelim" to BlastSearch 877 * 878 * Revision 1.12 1996/02/02 19:25:32 madden 879 * Added wfp_first and wfp_second to BlastParameterBlk for first and second pass. 880 * 881 * Revision 1.11 1996/01/29 21:12:07 madden 882 * *** empty log message *** 883 * 884 * Revision 1.10 1996/01/23 16:31:47 madden 885 * e_cutoff changed from BLAST_Score to double in ParameterBlk. 886 * 887 * Revision 1.9 1996/01/17 17:00:40 madden 888 * Added gap parameters to ParameterBlk, dblen to SearchBlk. 889 * 890 * Revision 1.8 1996/01/17 13:45:58 madden 891 * Added gap_prob and gap_decay_rate to ParameterBlk. 892 * 893 * Revision 1.7 1996/01/11 15:17:36 madden 894 * Added process_num to ParameterBlk. 895 * 896 * Revision 1.6 1996/01/08 23:23:55 madden 897 * removed "len" from HSP. 898 * 899 * Revision 1.5 1996/01/06 18:57:47 madden 900 * Added BLAST_HSP_LINK structure. 901 * 902 * Revision 1.4 1995/12/28 21:26:05 madden 903 * *** empty log message *** 904 * 905 * Revision 1.3 1995/12/26 23:04:14 madden 906 * Added parameters to BlastParameterBlk. 907 * 908 * Revision 1.2 1995/12/21 23:10:41 madden 909 * BLAST_Score prototypes moved to blastkar.h. 910 * 911 * Revision 1.1 1995/12/19 22:33:06 madden 912 * Initial revision 913 * 914 * Revision 1.1 1995/12/08 15:48:23 madden 915 * Initial revision 916 * 917 * */ 918 #ifndef __BLASTSTR__ 919 #define __BLASTSTR__ 920 921 #include <ncbi.h> 922 #include <lookup.h> 923 #include <blastkar.h> 924 #include <objalign.h> 925 #include <sequtil.h> 926 #include <readdb.h> 927 #include <gapxdrop.h> 928 #include <mbalign.h> 929 930 #ifdef __cplusplus 931 extern "C" { 932 #endif 933 934 /* Defines for program numbers. (Translated in BlastGetProgramNumber). */ 935 #define blast_type_undefined 0 936 #define blast_type_blastn 1 937 #define blast_type_blastp 2 938 #define blast_type_blastx 3 939 #define blast_type_tblastn 4 940 #define blast_type_tblastx 5 941 #define blast_type_psitblastn 6 942 943 944 /* defines for strand_option, determines which strand of query to compare. */ 945 #define BLAST_TOP_STRAND 1 946 #define BLAST_BOTTOM_STRAND 2 947 #define BLAST_BOTH_STRAND 3 948 949 /* Defines that specify whether or not BLAST should delete some memory, or 950 leave it up to the caller. 951 */ 952 #define BLAST_OWN 0 953 #define BLAST_NOT_OWN 1 954 955 /* Specifies minimum search space size for an awak thread. */ 956 #define AWAKE_THR_MIN_SIZE 2000000000000.0 957 958 #ifndef _BLASTCONCAT_ 959 #include "blastconcat.h" 960 #endif 961 /* --KM concat */ 962 963 /* Some default values (used when creating blast options block and for 964 * command-line program defaults. When changing these defaults, please 965 * remember to update the defaults in the command-line programs */ 966 #define WINDOW_SIZE_PROT 40 967 #define WINDOW_SIZE_NUCL 0 968 #define WINDOW_SIZE_MEGABLAST 0 969 970 #define WORDSIZE_PROT 3 971 #define WORDSIZE_NUCL 11 972 #define WORDSIZE_MEGABLAST 28 973 974 /* Protein gap costs are the defaults for the BLOSUM62 scoring matrix. 975 * More gap costs are listed in BLASTOptionSetGapParams */ 976 #define GAP_OPEN_PROT 11 977 #define GAP_OPEN_NUCL 5 978 #define GAP_OPEN_MEGABLAST 0 979 980 #define GAP_EXTN_PROT 1 981 #define GAP_EXTN_NUCL 2 982 #define GAP_EXTN_MEGABLAST 0 983 984 #define WORD_THRESHOLD_BLASTP 11 985 #define WORD_THRESHOLD_BLASTN 0 986 #define WORD_THRESHOLD_BLASTX 12 987 #define WORD_THRESHOLD_TBLASTN 13 988 #define WORD_THRESHOLD_TBLASTX 13 989 #define WORD_THRESHOLD_MEGABLAST 0 990 991 #define UNGAPPED_X_DROPOFF_PROT 7 992 #define UNGAPPED_X_DROPOFF_NUCL 20 993 #define UNGAPPED_X_DROPOFF_MEGABLAST 10 994 995 #define GAP_X_DROPOFF_PROT 15 996 #define GAP_X_DROPOFF_NUCL 30 997 #define GAP_X_DROPOFF_MEGABLAST 20 998 #define GAP_X_DROPOFF_TBLASTX 0 999 1000 #define GAP_X_DROPOFF_FINAL_PROT 25 1001 #define GAP_X_DROPOFF_FINAL_NUCL 50 1002 #define GAP_X_DROPOFF_FINAL_TBLASTX 0 1003 1004 /* reward and penalty only apply to blastn/megablast */ 1005 #define PENALTY -3 1006 #define REWARD 1 1007 1008 /******************************************************************** 1009 * 1010 * define for collecting BLAST stats. 1011 * 1012 ***********************************************************************/ 1013 1014 #define BLAST_COLLECT_STATS 1015 1016 /******************************************************************** 1017 * 1018 * Structure to save timing info. in. Right now this only 1019 * works for UNIX. 1020 * 1021 ********************************************************************/ 1022 1023 typedef struct _blast_time_keeper { 1024 FloatLo user, /* CPU time in user space of the process. */ 1025 system, /* CPU time used by system. */ 1026 total; /* total CPU time (i.e., both of the above). */ 1027 } BlastTimeKeeper, PNTR BlastTimeKeeperPtr; 1028 1029 1030 /*************************************************************************** 1031 Macros added by Andy Neuwald in order to allow easy modification of matrices. 1032 ***************************************************************************/ 1033 1034 #define MtrxScorePosSearch(S,x,y) ((S)->posMatrix[(x)][(y)]) 1035 #define PtrMtrxScorePosSearch(S,x) ((S)->posMatrix[(x)]) 1036 1037 /***** 1038 #define MtrxScorePosSearchi2(S,x,y) \ 1039 ((S)->posMatrix[( (x) %(S)->query_length)][(y)]) 1040 #define PtrMtrxScorePosSearch2(S,x) \ 1041 ((S)->posMatrix[( (x) %(S)->query_length)]) 1042 *****/ 1043 1044 /******************************************************************** 1045 1046 Defines for discontiguous word hits on 1st and 2nd pass. 1047 1048 ********************************************************************/ 1049 1050 #define BLAST_NO_PASS_DISCONTIG 0 1051 #define BLAST_1ST_PASS_DISCONTIG 1 1052 #define BLAST_2ND_PASS_DISCONTIG 2 1053 #define BLAST_BOTH_PASS_DISCONTIG 3 1054 1055 #define CODON_LENGTH 3 /* three is always the codon length. */ 1056 1057 #define BLAST_SMALL_GAPS 0 1058 #define BLAST_LARGE_GAPS 1 1059 #define MAX_INTRON_LENGTH 4000 1060 #define MAX_DBSEQ_LEN 5000000 1061 1062 /********************************************************************* 1063 Filter types definitions 1064 *********************************************************************/ 1065 1066 #define FILTER_NONE 0 1067 #define FILTER_DUST 1 1068 #define FILTER_SEG 2 1069 1070 typedef enum { 1071 MB_WORD_CODING = 0, 1072 MB_WORD_OPTIMAL = 1, 1073 MB_TWO_TEMPLATES = 2 1074 } MBDiscWordType; 1075 1076 /********************************************************************** 1077 Structure for the blast options (available to user/programmer). 1078 This should be filled in by the "Main" program before blast 1079 is called. 1080 1081 If changes are made to this structure, corresponding changes should 1082 likely be made to BLAST_WizardOptionsBlk and BLAST_WizardOptionsMask. 1083 ***********************************************************************/ 1084 1085 typedef struct _blast_optionsblk { 1086 Nlm_FloatHi gap_decay_rate, /* decay rate. */ 1087 gap_prob; /* Prob of decay. */ 1088 Int4 gap_size, /* Small gap size. */ 1089 window_size,/* Multiple Hits window size (zero for single hit algorithm) */ 1090 threshold_first, /* Threshold for extending hits (preliminary pass), zero if one-pass algorithm is used. */ 1091 threshold_second;/* Threshold for extending hits (second pass) */ 1092 Nlm_FloatHi expect_value, /* Expectation value (E) */ 1093 e2; /* Expect value for a single HSP */ 1094 /* These two scores are zero, unless they've been set, then they set 1095 the expect_value and e2 above. */ 1096 Int4 cutoff_s, /* score corresponds to expect_value above.*/ 1097 cutoff_s2; /* score corresponds to e2 above. */ 1098 Boolean two_pass_method; /* should two passes be used? */ 1099 Boolean multiple_hits_only; /* Only the multiple hits alg. used. */ 1100 Int4 hitlist_size; /* How many hits should be returned. */ 1101 Nlm_FloatHi number_of_bits; /* Number of bits to initiate 2nd pass (default is used if zero) */ 1102 Nlm_FloatHi dropoff_1st_pass, /* dropoff ("X") used for 1st pass. */ 1103 dropoff_2nd_pass; /* dropoff ("X") used for 2nd pass. */ 1104 Int2 number_of_cpus; /* How many CPU's. */ 1105 CharPtr matrix; /* name of matrix to use. */ 1106 Boolean old_stats; /* Use old stats (option may disappear later) */ 1107 Boolean do_sum_stats; /* Should sum statistics be used? */ 1108 Boolean use_large_gaps; /* Use only large gaps for linking HSP's with sum stats. */ 1109 Int2 wordsize; /* size of word used to find hits. */ 1110 Int2 penalty, reward; /* penalty and reward, only for blastn */ 1111 /* The ID numbers from gc.prt are used for the genetic codes. */ 1112 Int4 genetic_code, /* genetic code for query (blastx, tblastx) */ 1113 db_genetic_code; /* genetic code for db (tblast[nx]). */ 1114 Int4 filter; /* filter type 0 mean no filter 1115 non-zero value indicate filer type */ 1116 CharPtr filter_string; /* String specifying the type of filtering and filter options. */ 1117 Boolean gapped_calculation; /* Is a gapped calc. being done? */ 1118 /* The next three are used ONLY for gapped alignments. */ 1119 Int4 gap_open, /* Cost to open a gap (NO extension). */ 1120 gap_extend; /* Cost to extend a gap one letter. */ 1121 Nlm_FloatHi gap_x_dropoff, /* X-dropoff (in bits) used by Gapped align routine. */ 1122 gap_x_dropoff_final; /* X-dropoff (in bits) used by Gapped align routine for FINAL alignment. */ 1123 Int4 decline_align; /* Cost for declining alignment */ 1124 Nlm_FloatHi gap_trigger; /* Score (in bits) to gap, if an HSP gaps well. */ 1125 1126 Boolean discontinuous; /* Should the SeqAlign be discontinuous.*/ 1127 /* What region of the query is required for the alignment. If start is 1128 zero and end is -1 (the entire query), then these are not checked. */ 1129 Int4 required_start, 1130 required_end; 1131 Int8 db_length; /* database size used for stat. calcul. */ 1132 Int4 dbseq_num; /* number of database sequences used for stat. calcul. */ 1133 Nlm_FloatHi searchsp_eff; /* Effective search space to be used. */ 1134 1135 /* Options for postion based blast. */ 1136 Nlm_FloatHi ethresh; 1137 Int4 maxNumPasses, 1138 pseudoCountConst; 1139 CharPtr program_name; /* program name, for reference. */ 1140 Int4 cpu_limit; /* timeout total. */ 1141 /* Used for region-dependent limits when storing hits. */ 1142 Int4 hsp_range_max, /* maximum hits for a range */ 1143 block_width; /* width of a block */ 1144 Boolean perform_culling; /* Should results be culled at all? */ 1145 Boolean isPatternSearch; /* Is this a use of PHI-BLAST?*/ 1146 CharPtr gifile; /* name of file containing list of gis on server */ 1147 ValNodePtr gilist; /* list of gis specified by client */ 1148 Boolean do_not_reevaluate; /* Don't perform BlastReevaluateWithAmbiguities. */ 1149 /* These options allow a subset of the database to be examined. IF they 1150 are set to zero, then the entire database is examined. */ 1151 Int4 first_db_seq, /* 1st sequence in db to be compared. */ 1152 final_db_seq; /* Final sequence to be compared. */ 1153 CharPtr entrez_query; /* user specified Entrez query to make selection from databases */ 1154 CharPtr org_name; /* user specified name of organizm; corresponding .gil file will be used */ 1155 Uint1 strand_option; /* BLAST_TOP_STRAND, BLAST_BOTTOM_STRAND, or BLAST_BOTH_STRAND. used by blast[nx] and tblastx */ 1156 Int4 hsp_num_max; /* maximum number of HSP's allowed. Zero indicates no limit. */ 1157 Uint1 tweak_parameters, /* For composition-based statistics. */ 1158 smith_waterman; 1159 Boolean unified_p; /* use a combination of alignment and 1160 compositional p-values when evaluating 1161 significance; ignored unless 1162 composition-based statisics is on. */ 1163 CharPtr phi_pattern; /* Pattern for PHI-Blast search */ 1164 Boolean use_real_db_size; /* Use real DB size. meant for use if a list of gis' is submitted, 1165 but statistics should be based upon the real database. */ 1166 Boolean use_best_align; /* option is to use alignments choosen by user in PSM computation API (used in WWW PSI-Blast); */ 1167 Int4 max_num_patterns; /* Maximum number of patterns to be used in PHI-Blast search */ 1168 Boolean is_megablast_search; /* Is this a MegaBlast search? */ 1169 Uint1 no_traceback; /* No traceback in MegaBLAST extension */ 1170 Boolean is_rps_blast; /* If this RPS Blast ? */ 1171 SeqLocPtr query_lcase_mask; /* Masking of input DNA regions */ 1172 Boolean sort_gi_list; /* Should the gi list be sorted? */ 1173 Boolean is_neighboring; /* Is this a neighboring task? */ 1174 Nlm_FloatHi kappa_expect_value; /* E-value threshold for 1175 hits to be saved when 1176 RedoAlignmentCore is used 1177 to compute final alignments; 1178 should equal expect_value for 1179 other types of alignment. */ 1180 Boolean explode_seqids; /* make one SeqAlign for every gi on a 1181 redundant sequence. */ 1182 Boolean megablast_full_deflines; /* Print full deflines in 1183 megablast one-line output */ 1184 Boolean is_ooframe; /* Use Out-Of-Frame gapping algorithm */ 1185 Int4 shift_pen; /* Out-Of-Frame shift penalty */ 1186 Boolean gilist_already_calculated; /* translation of gis to ordinalID's already done (used for neighboring). */ 1187 Boolean recoverCheckpoint; /* For psitblastn */ 1188 Boolean freqCheckpoint; /* For psitblastn */ 1189 CharPtr CheckpointFileName; /* For psitblastn */ 1190 Int4 longest_intron; /* the length of longest intron for linking HSPs */ 1191 FloatLo perc_identity; /* Identity percentage cut-off */ 1192 VoidPtr output; /* Output stream to put results to */ 1193 FloatHi scalingFactor; /* scaling factor used when constructing pssm for rpsblast. */ 1194 Int4 total_hsp_limit; /* total number of HSP's that will be processed to SeqAligns, zero means no limit. */ 1195 Boolean mb_one_base_step; /* Scan every base of the database */ 1196 Int2 mb_template_length; /* Length of the discontiguous word */ 1197 Boolean mb_use_dyn_prog; /* Use dynamic programming gapped extension in 1198 megablast with affine gap scores */ 1199 MBDiscWordType mb_disc_type; 1200 Uint4 NumQueries; /*--KM for query concatenation in [t]blastn */ 1201 Boolean ignore_gilist; /* Used in traceback stage to not lookup gi's */ 1202 } BLAST_OptionsBlk, PNTR BLAST_OptionsBlkPtr; 1203 1204 1205 /* -------------------------------------------------------------------- 1206 * 1207 * BLAST_WizardOptionsBlk contains those fields of BLAST_OptionsBlk 1208 * that a user can set. 1209 * 1210 * BLAST_WizardOptionsMask contains a Boolean for each field defined in 1211 * BLAST_WizardOptionsBlk, except those holding pointers. TRUE means 1212 * that the corresponding field in BLAST_WizardOptionsBlk is set. 1213 * 1214 * These structures are used only in conjunction with BLAST_Wizard. 1215 * 1216 * -------------------------------------------------------------------- 1217 */ 1218 1219 struct _blast_wizardoptionsblk { 1220 Int4 block_width; 1221 Int4 cutoff_s; 1222 Int4 db_genetic_code; 1223 CharPtr entrez_query; 1224 Nlm_FloatHi ethresh; 1225 Nlm_FloatHi expect_value; 1226 CharPtr filter_string; 1227 Int4 first_db_seq; 1228 Int4 final_db_seq; 1229 Int4 gap_extend; 1230 Int4 gap_open; 1231 Boolean gapped_calculation; 1232 Int4 genetic_code; 1233 ValNodePtr gilist; 1234 Int4 hitlist_size; 1235 Int4 hsp_range_max; 1236 Boolean is_ooframe; 1237 CharPtr matrix; 1238 MBDiscWordType mb_disc_type; 1239 Int2 mb_template_length; 1240 Uint1 no_traceback; 1241 Int2 penalty; 1242 FloatLo perc_identity; 1243 Boolean perform_culling; 1244 CharPtr phi_pattern; 1245 Int4 pseudoCountConst; 1246 SeqLocPtr query_lcase_mask; 1247 Int4 required_end; 1248 Int4 required_start; 1249 Int2 reward; 1250 Int8 db_length; 1251 Nlm_FloatHi searchsp_eff; 1252 Boolean smith_waterman; 1253 Uint1 strand_option; 1254 Int4 threshold_first; 1255 Int4 threshold_second; 1256 Uint1 tweak_parameters; 1257 Boolean use_best_align; 1258 Boolean use_real_db_size; 1259 Int4 window_size; 1260 Int2 wordsize; 1261 1262 Boolean two_hits; 1263 CharPtr string_options; 1264 }; 1265 1266 typedef struct _blast_wizardoptionsblk 1267 BLAST_WizardOptionsBlk, 1268 PNTR BLAST_WizardOptionsBlkPtr; 1269 1270 struct _blast_wizardoptionsmask { 1271 Boolean block_width; 1272 Boolean cutoff_s; 1273 Boolean db_genetic_code; 1274 Boolean ethresh; 1275 Boolean expect_value; 1276 Boolean first_db_seq; 1277 Boolean final_db_seq; 1278 Boolean gap_extend; 1279 Boolean gap_open; 1280 Boolean gapped_calculation; 1281 Boolean genetic_code; 1282 Boolean hitlist_size; 1283 Boolean hsp_range_max; 1284 Boolean is_ooframe; 1285 Boolean mb_disc_type; 1286 Boolean mb_template_length; 1287 Boolean no_traceback; 1288 Boolean penalty; 1289 Boolean perc_identity; 1290 Boolean perform_culling; 1291 Boolean pseudoCountConst; 1292 Boolean required_end; 1293 Boolean required_start; 1294 Boolean reward; 1295 Boolean db_length; 1296 Boolean searchsp_eff; 1297 Boolean smith_waterman; 1298 Boolean strand_option; 1299 Boolean threshold_first; 1300 Boolean threshold_second; 1301 Boolean tweak_parameters; 1302 Boolean use_best_align; 1303 Boolean use_real_db_size; 1304 Boolean window_size; 1305 Boolean wordsize; 1306 1307 Boolean two_hits; 1308 }; 1309 1310 typedef struct _blast_wizardoptionsmask 1311 BLAST_WizardOptionsMask, 1312 PNTR BLAST_WizardOptionsMaskPtr; 1313 1314 typedef enum { 1315 TEMPL_11_16 = 0, 1316 TEMPL_12_16 = 1, 1317 TEMPL_11_18 = 2, 1318 TEMPL_12_18 = 3, 1319 TEMPL_11_21 = 4, 1320 TEMPL_12_21 = 5, 1321 TEMPL_11_16_OPT = 6, 1322 TEMPL_12_16_OPT = 7, 1323 TEMPL_11_18_OPT = 8, 1324 TEMPL_12_18_OPT = 9, 1325 TEMPL_11_21_OPT = 10, 1326 TEMPL_12_21_OPT = 11, 1327 TEMPL_ERROR = -1 1328 } MBTemplateType; 1329 1330 typedef struct _mb_parameter_blk_ { 1331 Uint1 no_traceback; /* No traceback in greedy extension */ 1332 Boolean is_neighboring; /* Is this a neighboring task? */ 1333 Boolean full_seqids; /* Print full seqids in tabular output? */ 1334 FloatLo perc_identity; /* Identity percentage cut-off */ 1335 Int4 max_positions; /* Maximal number of positions in query of a given word */ 1336 Boolean disc_word; /* Use a discontiguous word template to find initial 1337 matches */ 1338 Boolean one_base_step; /* Form words for every position in the database 1339 sequence (default is every 4th position) */ 1340 Int2 word_weight; /* Number of identical nucleotides in a word match */ 1341 Int2 template_length; /* Length of a discontiguous word template */ 1342 Boolean use_dyn_prog; /* Use dynamic programming extension for affine gap 1343 scores */ 1344 MBTemplateType template_type; /* Type of a discontiguous template */ 1345 Boolean use_two_templates; 1346 } MegaBlastParameterBlk, PNTR MegaBlastParameterBlkPtr; 1347 1348 /**************************************************************************** 1349 1350 PARAMETER BLOCK: parameters for the BLAST search entered by on 1351 command line by user. 1352 1353 *****************************************************************************/ 1354 1355 typedef struct _blast_parameterblk { 1356 BLAST_Score threshold, /* threshold for extending a word hit*/ 1357 threshold_first, /* threshold for 1st pass. */ 1358 threshold_second, /* threshold for 2nd pass. */ 1359 X, /* drop-off score for extension. */ 1360 dropoff_1st_pass, /* dropoff ("X") used for 1st pass. */ 1361 dropoff_2nd_pass, /* dropoff ("X") used for 2nd pass. */ 1362 cutoff_s, /* Final Score to report a hit. */ 1363 cutoff_s1, /* Score to save an HSP after a gapped extension. */ 1364 cutoff_s2, /* Score to save an HSP after an ungapped extension. */ 1365 cutoff_s_first, /* Score (S2) to use on 1st pass */ 1366 cutoff_s_second, /* Score (S2) to use on 2nd pass and 1367 for "small" gaps in link_hsps (in blast.c) */ 1368 /* Max value of s2, used if s2 is set or s2 becomes larger than s. */ 1369 cutoff_s2_max, 1370 cutoff_big_gap; /* cutoff value for a "big" gap in 1371 link_hsps (in blast.c). */ 1372 Nlm_FloatHi cutoff_e, /* Expect value to report a hit. */ 1373 cutoff_e2, /* Expect value to report a hsp. */ 1374 number_of_bits; /* number of bits of significance, used 1375 to calculate cutoff_s_first (above). */ 1376 Boolean threshold_set, /*TRUE if threshold set on command-line*/ 1377 cutoff_s_set, /* TRUE if cutoff score set on c-l */ 1378 cutoff_s2_set, /* TRUE if cutoff score2 set on c-l */ 1379 cutoff_e_set, /* TRUE if cutoff expect set on c-l */ 1380 cutoff_e2_set, /* TRUE if cutoff expect2 set on c-l */ 1381 ignore_small_gaps, /* ignore small gaps if TRUE, set by 1382 CalculateSecondCutoffScore in blast.c if the search 1383 space is smalled than 8*gap_size*gap_size. */ 1384 window_size_set;/* TRUE if window size set for MHBLAST*/ 1385 Boolean sump_option; /* TRUE if sump is used. */ 1386 Int4 gap_size, /* max. gap allowed for small gaps.*/ 1387 window_size; /* used for multiple hits BLAST. */ 1388 Nlm_FloatHi gap_prob; /* prob. of gap of size "gap" (above).*/ 1389 Nlm_FloatHi gap_decay_rate; /* prob. of only one HSP */ 1390 Int2 process_num; /* max # processrs permitted (for MP).*/ 1391 Boolean old_stats; /* Use "old" stats if TRUE. */ 1392 Boolean do_sum_stats; /* Should sum statistics be used? */ 1393 Boolean use_large_gaps; /* Use only large gaps for linking HSP's with sum stats. */ 1394 Boolean two_pass_method; /* should two passes be used? */ 1395 Boolean multiple_hits_only; /* Only the multiple hits alg. used. */ 1396 Boolean discontinuous; /* Should discontinuous SeqAlign's be produced? */ 1397 Boolean gapped_calculation; /* Is a gapped calc. being done? */ 1398 Boolean do_not_reevaluate; /* Don't perform BlastReevaluateWithAmbiguities. */ 1399 /* The next three are used ONLY for gapped alignments. */ 1400 Int4 gap_open, /* Cost to open a gap (NO extension). */ 1401 gap_extend, /* Cost to extend a gap one letter. */ 1402 gap_x_dropoff, /* X-dropoff used by Gapped align routine. */ 1403 gap_x_dropoff_final; /* X-dropoff (in bits) used by Gapped align routine for FINAL alignment. */ 1404 Int4 decline_align; /* Cost for declining alignment */ 1405 1406 Nlm_FloatHi gap_trigger; /* Score (in bits) to gap, if an HSP gaps well.*/ 1407 1408 /* Options for postion based blast. */ 1409 Nlm_FloatHi ethresh; 1410 Int4 maxNumPasses, 1411 pseudoCountConst; 1412 Int4 cpu_limit; /* timeout total. */ 1413 Int4 hsp_range_max, /* maximum hits for a range */ 1414 max_pieces; /* Max number of pieces allowed (query_length/block_width) */ 1415 Boolean perform_culling; /* determines whether culling should be used or not. 1416 If not, then hsp_range_max, block_width, and max_pieces are ignored. */ 1417 /* These options allow a subset of the database to be examined. IF they 1418 are set to zero, then the entire database is examined. */ 1419 Int4 first_db_seq, /* 1st sequence in db to be compared. */ 1420 final_db_seq; /* Final sequence to be compared. */ 1421 Int4 hsp_num_max; /* maximum number of HSP's allowed. Zero indicates no limit. */ 1422 Boolean use_best_align; /* option is to use alignments choosen by user in PSM computation API (used in WWW PSI-Blast); */ 1423 MegaBlastParameterBlkPtr mb_params; /* Is this a MegaBlast search? */ 1424 CharPtr filter_string; /* String specifying the type of filtering and filter options. - used with Translated RPS Blast */ 1425 Boolean is_rps_blast; /* If this RPS Blast ? */ 1426 SeqLocPtr query_lcase_mask; /* Masking of input DNA regions */ 1427 Boolean explode_seqids; /* make one SeqAlign for every gi on a 1428 redundant sequence. */ 1429 Boolean is_ooframe; /* Use Out-Of-Frame gapping algorithm */ 1430 Int4 shift_pen; /* Out-Of-Frame shift penalty */ 1431 Int4 longest_intron; /* the length of longest intron for linking HSPs */ 1432 FloatHi scalingFactor; /* scaling factor used when constructing pssm for rpsblast. */ 1433 Int4 total_hsp_limit; /* total number of HSP's that will be processed to SeqAligns, zero means no limit. */ 1434 } BLAST_ParameterBlk, PNTR BLAST_ParameterBlkPtr; 1435 1436 typedef Nlm_Int4 BLAST_Diag, PNTR BLAST_DiagPtr; 1437 1438 /* Structure to keep track of the last hit and diag level. */ 1439 1440 typedef struct cfj_mod_struct{ 1441 Int4 last_hit; 1442 Int4 diag_level; 1443 } CfjModStruct; 1444 /* 1445 BLAST_ExtendWord contains information about which diagonals 1446 have been extended over (i.e., which diagonals have been 1447 tested). This structure will be duplicated once for each 1448 context as every context is different. 1449 */ 1450 typedef struct _blast_extend_word { 1451 Int4Ptr _buffer; /* The "real" buffer for diag_level, version, 1452 and last_hit arrays. */ 1453 CfjModStruct *combo_array; 1454 Int4Ptr version; /* still needed?? */ 1455 Int4 actual_window; /* The actual window used if the multiple 1456 hits method was used and a hit was found. */ 1457 } BLAST_ExtendWord, PNTR BLAST_ExtendWordPtr; 1458 1459 /* 1460 BLAST_ExtendWordParams contains parameters about the extensions. 1461 Only one copy of this structure is needed, regardless of how many 1462 contexts there are. 1463 */ 1464 typedef struct _blast_extend_word_params { 1465 Int4 bits_to_shift; /* how many bits should the diagonal be 1466 shifted to get the "version" */ 1467 Int4 min_diag_length, /* Min. length of diagonal, actuall 1468 2**bits_to_shift. */ 1469 min_diag_mask; /* Used to mask off everything above 1470 min_diag_length (mask = min_diag_length-1). */ 1471 Int4 offset; /* "offset" added to query and subject position 1472 so that "diag_level" and "last_hit" don't have 1473 to be zeroed out every time. */ 1474 Int4 window; /* The "window" size, within which two (or more) 1475 hits must be found in order to be extended. */ 1476 /* Used by BLAST_ExtendWordNew to decide whether or not 1477 to prepare the structure for multiple-hit type searches. 1478 If TRUE, multiple hits are not neccessary, but possible. */ 1479 Boolean multiple_hits; 1480 } BLAST_ExtendWordParams, PNTR BLAST_ExtendWordParamsPtr; 1481 /* 1482 Data block to describe a single sequence. 1483 */ 1484 1485 typedef struct blast_sequence_block { 1486 Uint1Ptr sequence, /* Actual (perhaps transl.) sequence. */ 1487 sequence_start; /* Start of sequence, used if the sequence is preceded by a NULLB. Sequences 1488 starting with a NULLB are used by BlastWordExtend_L1. */ 1489 Int4 length, /* length of sequence. */ 1490 original_length,/* length before translation. */ 1491 effective_length;/* effective length, used only by query. */ 1492 Int2 frame; /* frame of the sequence. */ 1493 } BlastSequenceBlk, PNTR BlastSequenceBlkPtr; 1494 1495 1496 typedef struct _blast_seg { 1497 Int2 frame; 1498 Int4 offset; /* start of hsp */ 1499 Int4 length; /* length of hsp */ 1500 Int4 end; /* end of HSP */ 1501 Int4 offset_trim; /* start of trimmed hsp */ 1502 Int4 end_trim; /* end of trimmed HSP */ 1503 /* Where the gapped extension (with X-dropoff) started. */ 1504 Int4 gapped_start; 1505 } BLAST_Seg, PNTR BLAST_SegPtr; 1506 1507 #define BLAST_NUMBER_OF_ORDERING_METHODS 2 1508 1509 1510 /* 1511 The following structure is used in "link_hsps" to decide between 1512 two different "gapping" models. Here link is used to hook up 1513 a chain of HSP's (this is a VoidPtr as _blast_hsp is not yet 1514 defined), num is the number of links, and sum is the sum score. 1515 Once the best gapping model has been found, this information is 1516 transferred up to the BLAST_HSP. This structure should not be 1517 used outside of the function link_hsps. 1518 */ 1519 typedef struct _blast_hsp_link { 1520 /* Used to order the HSP's (i.e., hook-up w/o overlapping). */ 1521 VoidPtr link[BLAST_NUMBER_OF_ORDERING_METHODS]; 1522 /* number of HSP in the ordering. */ 1523 Int2 num[BLAST_NUMBER_OF_ORDERING_METHODS]; 1524 /* Sum-Score of HSP. */ 1525 Int4 sum[BLAST_NUMBER_OF_ORDERING_METHODS]; 1526 /* Sum-Score of HSP, multiplied by the appropriate Lambda. */ 1527 Nlm_FloatHi xsum[BLAST_NUMBER_OF_ORDERING_METHODS]; 1528 Int4 changed; 1529 } BLAST_HSP_LINK, PNTR BLAST_HSP_LINKPtr; 1530 /* 1531 BLAST_NUMBER_OF_ORDERING_METHODS tells how many methods are used 1532 to "order" the HSP's. 1533 */ 1534 1535 typedef struct _blast_hsp { 1536 struct _blast_hsp PNTR next, /* the next HSP */ 1537 PNTR prev; /* the previous one. */ 1538 BLAST_HSP_LINK hsp_link; 1539 /* Is this HSp part of a linked set? */ 1540 Boolean linked_set; 1541 /* which method (max or no max for gaps) was used? */ 1542 Int2 ordering_method; 1543 /* how many HSP's make up this (sum) segment */ 1544 Int4 num; 1545 /* normalized score of a set of "linked" HSP's */ 1546 Nlm_FloatHi xsum; 1547 /* If TRUE this HSP starts a chain along the "link" pointer. */ 1548 Boolean start_of_chain; 1549 BLAST_Score score; 1550 Int4 num_ident; 1551 Nlm_FloatHi evalue; 1552 BLAST_Seg query, /* query sequence info. */ 1553 subject; /* subject sequence info. */ 1554 Int2 context; /* Context number of query */ 1555 GapXEditBlockPtr gap_info; /* ALL gapped alignment is here */ 1556 Int4 num_ref; 1557 Int4 linked_to; 1558 /*which method if any was used for compositional adjustment? 1559 relevant only for blastp*/ 1560 Int2 comp_adjustment_method; 1561 } BLAST_HSP, PNTR BLAST_HSPPtr; 1562 1563 /* The helper arrays contains the info used frequently in the inner for loops. -cfj 1564 * One array of helpers will be allocated for each thread. See comments preceding 1565 * link_hsps in blast.c for more info. 1566 */ 1567 1568 typedef struct link_help_struct{ 1569 BLAST_HSPPtr ptr; 1570 Int4 q_off_trim; 1571 Int4 s_off_trim; 1572 Int4 sum[BLAST_NUMBER_OF_ORDERING_METHODS]; 1573 Int4 maxsum1; 1574 Int4 next_larger; 1575 } LinkHelpStruct; 1576 1577 /* Orders information for HSP accesses. */ 1578 typedef struct hsp_helper{ 1579 Int4 qoffset, 1580 qend; 1581 } BLAST_HSP_helper, PNTR BLAST_HSP_helperPtr; 1582 1583 1584 typedef struct _exact_match { 1585 Int4 q_off; 1586 Int4 s_off; 1587 } MegaBlastExactMatch, PNTR MegaBlastExactMatchPtr; 1588 1589 typedef struct _blast_hitlist { 1590 struct _blast_hitlist PNTR next; 1591 BLAST_HSPPtr PNTR hsp_array; /* head of linked list of HSPs */ 1592 Int4 hspmax, /* max no. of HSPs allowed per hit list */ 1593 hspcnt, /* no. of HSPs in hit list */ 1594 hspcnt_max; /* no. of HSPs in hitlist, before reaping */ 1595 Boolean further_process; /* This sequence has been found interesting, 1596 it should be further processed by a gapped 1597 alignment etc. */ 1598 Boolean do_not_reallocate; /* Don't reallocate the HSP's, probably because 1599 there is no more memory for this. */ 1600 /* added -cfj */ 1601 LinkHelpStruct *lh_helper; 1602 Int4 lh_helper_size; 1603 MegaBlastExactMatchPtr exact_match_array; /* Array to hold initial 1604 exact match hits */ 1605 Int4 exact_match_max; 1606 } BLAST_HitList, PNTR BLAST_HitListPtr; 1607 1608 /* 1609 The next two structures are the final output produced by BLAST. Formatters should then 1610 convert the data into SeqAligns or the BLAST ASN.1 spec. 1611 */ 1612 1613 typedef struct _blast_results_hsp { 1614 Int2 ordering_method;/* determines whether large or small gap was used. */ 1615 Int4 number; /* number of HSP's used to calculate the p-value. */ 1616 BLAST_Score score; /* score of this HSP. */ 1617 Nlm_FloatHi e_value,/* expect value of this set of HSP's. */ 1618 bit_score; /* above score * lambda/ln2 */ 1619 Int4 num_ident;/* number of identities in this HSP. */ 1620 Int2 context; /* context number of query. */ 1621 Int2 query_frame, /* frame of query, non-zero if transl. */ 1622 subject_frame; /* frame of subject, non-zero if transl. */ 1623 Int4 query_offset, /* Start of the query HSP. */ 1624 query_length, /* Length of the query HSP. */ 1625 subject_offset, /* Start of the subject HSP. */ 1626 subject_length, /* Length of the subject HSP.*/ 1627 hspset_cnt; /* which set of HSP's? */ 1628 /* Starting points (on original HSP) for a gapped extension with X dropoff. */ 1629 Int4 query_gapped_start, 1630 subject_gapped_start; 1631 1632 GapXEditBlockPtr gap_info; /* ALL gapped alignment is here */ 1633 struct _blast_result_hitlist PNTR point_back; 1634 struct _blast_heap_struct PNTR back_left, PNTR back_right; 1635 } BLASTResultHsp, PNTR BLASTResultHspPtr; 1636 1637 /* 1638 The following structure contains the subject info, if the readdb 1639 facility is not being used. Then the subject information is 1640 kept here. Otherwise this structure is NULL. 1641 */ 1642 typedef struct _blast_subject_info { 1643 SeqIdPtr sip; /* ID of the subject. */ 1644 CharPtr defline; /* Defline of the subject. */ 1645 Int4 length; /* untranslated length of the database sequence. */ 1646 } BLASTSubjectInfo, PNTR BLASTSubjectInfoPtr; 1647 1648 typedef struct _blast_result_hitlist { 1649 BLASTResultHspPtr hsp_array; /* An array holding the HSP's. */ 1650 Nlm_FloatHi best_evalue; /* best evalue in all the HSP's. */ 1651 Int4 high_score; /* HSP with highest score. */ 1652 Int4 hspcnt, /* Number of HSP's. */ 1653 subject_id; /* ID of the subject. */ 1654 Int2 db_id; /* ID (0,1,2...) of the db if multiple db's searched. */ 1655 Int4 subject_length; /* length of the database sequence. */ 1656 BLASTSubjectInfoPtr subject_info; /* Subject info if the readdb facility is not being used. */ 1657 SeqAlignPtr seqalign; /* alignment, if this a gapped calculation. */ 1658 Int4 num_ref; 1659 } BLASTResultHitlist, PNTR BLASTResultHitlistPtr; 1660 1661 1662 typedef struct _blast_heap_struct { 1663 Int4 cutvalue; /* start of a region? */ 1664 BLASTResultHspPtr PNTR heap; 1665 Int4 num_in_heap; /* Number in 'heap' */ 1666 Int4 num_of_ref; 1667 struct _blast_heap_struct PNTR next, PNTR prev; 1668 } BLASTHeapStruct, PNTR BLASTHeapPtr; 1669 1670 /* 1671 Holds the results already saved. 1672 */ 1673 1674 typedef struct _blast_results_struct { 1675 1676 BLASTResultHitlistPtr PNTR results; 1677 Int4 hitlist_count, /* Number of hitlists saved on results array already. */ 1678 hitlist_max, /* Length of results array. */ 1679 max_pieces; /* For range-dependent limits. */ 1680 BLASTResultHspPtr **heap; 1681 Int4 *num_in_heap; 1682 BLASTHeapPtr heap_ptr; 1683 } BLASTResultsStruct, PNTR BLASTResultsStructPtr; 1684 1685 /* 1686 Holds the data for all possible words that might be used by BLAST. 1687 */ 1688 1689 typedef struct _blast_all_words { 1690 Uint1Ptr *array, /* All the possible words */ 1691 array_storage; /* Storage for the words in array. */ 1692 Int4 num_of_cols, 1693 wordsize; 1694 Boolean rows_allocated, /* are the rows (of length the wordsize) alloc.*/ 1695 specific; /* specific (limited) words are to be indexed. */ 1696 } BlastAllWord, *BlastAllWordPtr; 1697 1698 /* 1699 Contains gi and ordinal number for use by random access BLAST. 1700 */ 1701 typedef struct _double_int4 { 1702 Int4 gi, 1703 ordinal_id, 1704 start; 1705 } BlastDoubleInt4, *BlastDoubleInt4Ptr; 1706 1707 1708 typedef struct _blast_gi_list { 1709 BlastDoubleInt4Ptr gi_list; /* List of gi's. */ 1710 BlastDoubleInt4Ptr *gi_list_pointer; /* Pointer to above list. */ 1711 Int4 current; /* Current position in gi list. */ 1712 Int4 total; /* total number of gi's. */ 1713 Boolean gilist_not_owned; /* do not delete gilist at end. */ 1714 } BlastGiList, *BlastGiListPtr; 1715 1716 /* 1717 used for keeping start and stop of hits to query, for ALU filtering. 1718 */ 1719 typedef struct _blast_hit_range { 1720 BlastDoubleInt4Ptr range_list; /* ranges. */ 1721 BlastDoubleInt4Ptr *range_list_pointer; /* Pointer to above list. */ 1722 Int4 current, /* current position in list. */ 1723 total; /* total number in list. */ 1724 SeqIdPtr query_id; /* ID to be put on SeqLoc's that are produced. */ 1725 Int4 base_offset; /* used if a SeqLoc is searched and it does not start at begining 1726 of sequence. */ 1727 } BlastHitRange, *BlastHitRangePtr; 1728 1729 /* 1730 Contains BLAST error messages. 1731 */ 1732 1733 typedef struct _blast_error_msg { 1734 Uint2 level;/* corresponds to levels of ErrPostEx [none(0), info(1), warn(2), error(3) and fatal(4)] */ 1735 CharPtr msg; 1736 } BlastErrorMsg, *BlastErrorMsgPtr; 1737 1738 /* 1739 Holds data for each "context" (which is generally equal to 1740 one frame of the query). blastx would have six contexts, 1741 blastp would have one. 1742 */ 1743 1744 typedef struct _blast_context_structure { 1745 Boolean query_allocated;/* The BlastSequenceBlkPtr IS allocated. */ 1746 BlastSequenceBlkPtr query; /* query sequence. */ 1747 BLAST_ExtendWordPtr ewp;/* keep track of diagonal etc. for each frame */ 1748 ValNodePtr location; /* Where to start/stop masking. */ 1749 } BLASTContextStruct, PNTR BLASTContextStructPtr; 1750 1751 /* Structure used for full Smith-Waterman results. */ 1752 1753 typedef struct SWResults { 1754 Uint1Ptr seq; 1755 Int4 seqStart; 1756 Int4 seqEnd; 1757 Int4 queryStart; 1758 Int4 queryEnd; 1759 Int4 *reverseAlignScript; 1760 BLAST_Score score; 1761 BLAST_Score scoreThisAlign; 1762 Nlm_FloatHi eValue; 1763 Nlm_FloatHi eValueThisAlign; 1764 Nlm_FloatHi Lambda; 1765 Nlm_FloatHi logK; 1766 SeqIdPtr subject_id; /*used to display the sequence in alignment*/ 1767 struct SWResults *next; 1768 Boolean isFirstAlignment; 1769 Int4 subject_index; /* needed to break ties on rare occasions */ 1770 SeqAlignPtr seqAlign; /*needed when seqAlign is already computed*/ 1771 } SWResults; 1772 1773 /* Average sizes of protein and nucl. sequences. */ 1774 #define BLAST_AA_AVGLEN 300 1775 #define BLAST_NT_AVGLEN 1000 1776 1777 /* How many ticks should be emitted total. */ 1778 #define BLAST_NTICKS 50 1779 1780 /* period of sending out a star/message. */ 1781 #define STAR_MSG_PERIOD 60 1782 1783 typedef struct _BlastThrInfo { 1784 1785 TNlmMutex db_mutex; /*lock for access to database*/ 1786 TNlmMutex results_mutex; /*lock for storing results */ 1787 TNlmMutex callback_mutex; /*lock for issuing update ticks on the screen*/ 1788 /* Mutex for recalculation of ambiguities, in BlastReevaluateWithAmbiguities */ 1789 TNlmMutex ambiguities_mutex; 1790 1791 /* 1792 GI List to be used if database will be searched by GI. 1793 current is the current element in the array being worked on. 1794 global_gi_being_used specifies that it will be used. 1795 */ 1796 Int4 gi_current; 1797 BlastGiListPtr blast_gi_list; 1798 1799 /* Number of database sequences for each thread to process. */ 1800 Int4 db_chunk_size; 1801 1802 /* The last db sequence to be assigned. Used only in get_db_chunk after 1803 the acquisition of the "db_mutex" (above). */ 1804 Int4 db_chunk_last; 1805 1806 /* the last sequence in the database to be compared against. */ 1807 Int4 final_db_seq; 1808 Int4 number_seqs_done; /*number of sequences already tested*/ 1809 Int4 db_incr; /*size of a database chunk to get*/ 1810 Int4 last_db_seq; 1811 1812 /* How many positive hits were found (set by ReapHitlist, read by tick_proc 1813 and star_proc). */ 1814 Int4 number_of_pos_hits; 1815 1816 /* Use by star_proc to determine whether to emit a star. */ 1817 time_t last_tick; 1818 1819 /* tells star_proc to check that a star should be emitted. */ 1820 TNlmThread awake_thr; 1821 Boolean awake; 1822 1823 /* tells index_proc to check that a message should be emitted. */ 1824 TNlmThread index_thr; 1825 Boolean awake_index; 1826 1827 /* 1828 Callback functions to indicate progress, or lack thereof. 1829 */ 1830 int (LIBCALLBACK *tick_callback)PROTO((Int4 done, Int4 positives)); 1831 int (LIBCALLBACK *star_callback)PROTO((Int4 done, Int4 positives)); 1832 int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives)); 1833 1834 /* whether real databases are done */ 1835 Boolean realdb_done; 1836 1837 } BlastThrInfo, PNTR BlastThrInfoPtr; 1838 1839 /* 1840 Structure used for matrix rescaling. 1841 */ 1842 1843 typedef struct _blast_matrix_rescale { 1844 Int4 alphabet_size, 1845 query_length; /* length of query. */ 1846 Uint1Ptr query; 1847 Nlm_FloatHi *standardProb; 1848 Int4Ptr *matrix; 1849 Int4Ptr *private_matrix; 1850 BLAST_KarlinBlkPtr *kbp_std, 1851 *kbp_psi, 1852 *kbp_gap_std, 1853 *kbp_gap_psi; 1854 Nlm_FloatHi lambda_ideal, 1855 K_ideal; 1856 } BlastMatrixRescale, *BlastMatrixRescalePtr; 1857 1858 1859 /* 1860 The central structure for the BLAST search. This structure 1861 should contain data (or pointers to data) for all the 1862 information in a BLAST search. 1863 */ 1864 1865 1866 #define BLAST_SEARCH_ALLOC_QUERY 1 1867 #define BLAST_SEARCH_ALLOC_SUBJECT 2 1868 #define BLAST_SEARCH_ALLOC_PBP 4 1869 #define BLAST_SEARCH_ALLOC_SBP 8 1870 #define BLAST_SEARCH_ALLOC_WFP_FIRST 16 1871 #define BLAST_SEARCH_ALLOC_WFP_SECOND 32 1872 #define BLAST_SEARCH_ALLOC_EWPPARAMS 64 1873 #define BLAST_SEARCH_ALLOC_CONTEXT 128 1874 #define BLAST_SEARCH_ALLOC_RESULTS 256 1875 #define BLAST_SEARCH_ALLOC_READDB 512 1876 #define BLAST_SEARCH_ALLOC_TRANS_INFO 1024 1877 #define BLAST_SEARCH_ALLOC_ALL_WORDS 2048 1878 #define BLAST_SEARCH_ALLOC_QUERY_SLP 4096 1879 #define BLAST_SEARCH_ALLOC_THRINFO 8192 1880 #define BLAST_SEARCH_ALLOC_MASK1 16384 1881 1882 typedef struct blast_search_block { 1883 Int4 allocated; 1884 /* bit fields specify which structures from below are allocated. If 1885 a field is allocated, then it's bit is non-zero. 1886 1887 structure bit-field (define) 1888 ----------------------------------------- 1889 query BLAST_SEARCH_ALLOC_QUERY 1890 subject BLAST_SEARCH_ALLOC_SUBJECT 1891 pbp BLAST_SEARCH_ALLOC_PBP 1892 sbp BLAST_SEARCH_ALLOC_SBP 1893 wfp_first BLAST_SEARCH_ALLOC_WFP_FIRST 1894 wfp_second BLAST_SEARCH_ALLOC_WFP_SECOND 1895 ewp_params BLAST_SEARCH_ALLOC_EWPPARAMS 1896 context BLAST_SEARCH_ALLOC_CONTEXT 1897 result_struct BLAST_SEARCH_ALLOC_RESULTS 1898 rdfp BLAST_SEARCH_ALLOC_READDB 1899 translation_table BLAST_SEARCH_ALLOC_TRANS_INFO 1900 translation_table_rc 1901 all_words BLAST_SEARCH_ALLOC_ALL_WORDS 1902 query_slp BLAST_SEARCH_ALLOC_QUERY_SLP 1903 mask1 BLAST_SEARCH_ALLOC_MASK1 1904 */ 1905 1906 /* 1907 Specifies whether the search is position based or not. 1908 */ 1909 Boolean positionBased; 1910 Boolean posConverged; 1911 /* 1912 Specifies that the query sequence was invalid (e.g., XXXXXXXXXXXXXXXXXXXXXX). 1913 */ 1914 Boolean query_invalid; 1915 /* Specifies that the search timed out (i.e., cpu time limit was reached). */ 1916 Boolean timed_out; 1917 /* 1918 The BLASTContextStructPtr is an array and each element contains 1919 information about the query sequence and the frame number. 1920 If there are six frames (e.g., blastx) then the BLASTContextStructPtr 1921 is six elements long; if there's one frame (e.g., blastp) then 1922 BLASTContextStructPtr is one element long. 1923 1924 number_of_contexts states how long the context array is. 1925 */ 1926 BLASTContextStructPtr context; 1927 Int2 first_context, 1928 last_context; 1929 /* 1930 The GapAlignBlkPtr used by ALIGN (in gapxdrop.c) for gapped alignments. 1931 */ 1932 1933 GapAlignBlkPtr gap_align; 1934 1935 /* 1936 All the possible words. 1937 */ 1938 BlastAllWordPtr all_words; 1939 /* 1940 Set the context_factor, which specifies how many different 1941 ways the query or db is examined (e.g., blastn looks at both 1942 stands of query, context_factor is 2). 1943 */ 1944 Int2 context_factor; 1945 1946 /* 1947 What type of search (e.g., blastp, blastx, etc.)? 1948 */ 1949 CharPtr prog_name; 1950 Uint1 prog_number; 1951 /* 1952 translation_table and translation_table_rc holds the translation 1953 from ncbi2na to ncbistdaa for normal and reverse-complement 1954 translations. Only used and initialized with tblast[nx]. 1955 Initialized by GetPrivatTranslationTable 1956 */ 1957 Uint1Ptr translation_table, 1958 translation_table_rc; 1959 1960 /* 1961 ValNodePtr containing error messages. 1962 */ 1963 ValNodePtr error_return; 1964 1965 /* 1966 ValNodePtr containing masking SeqLocPtr's 1967 */ 1968 ValNodePtr mask; 1969 ValNodePtr mask1; 1970 /* 1971 What genetic codes are we using to translate the query or database 1972 when needed. Based upon NCBI genetic codes. 1973 */ 1974 CharPtr genetic_code, /* genetic code used for query. */ 1975 db_genetic_code; /* genetic code used for database. */ 1976 1977 /* 1978 The BlastSequenceBlk's subject hold info about the subject. 1979 Info about the original sequence is in original_seq. This will 1980 be NULL if the sequence was not translated. 1981 */ 1982 Uint1Ptr translation_buffer; /* Buffer for (tblast[nx]) db translations*/ 1983 Int4 translation_buffer_size; /* size of translation_buffer. */ 1984 CharPtr original_seq; /* Original (i.e., untransl.) sequence. */ 1985 BlastSequenceBlkPtr subject;/* subject sequence. */ 1986 1987 1988 /* KM-- info about individual queries from a concatenated query in 1989 blastn or tblastn */ 1990 struct queries PNTR mult_queries; /* struct defined in blastconcat.h */ 1991 1992 1993 /* 1994 SeqLocPtr for the query, owned by the called and not by BLAST. 1995 */ 1996 SeqLocPtr query_slp; 1997 1998 /* Id's for the query and subject. */ 1999 SeqIdPtr query_id; /* ID for the query, any form. */ 2000 Int4 subject_id; /* the number of the subject, in the DB. */ 2001 BLAST_ParameterBlkPtr pbp; /* options selected. */ 2002 BLAST_ScoreBlkPtr sbp; /* info on scoring. */ 2003 BLAST_ExtendWordParamsPtr ewp_params; /* parameters for extensions.*/ 2004 2005 /* For the two-pass method two BLAST_WordFinderPtr's are required. 2006 The actual wfp's are in wfp_first and wfp_second. "wfp" is just 2007 a pointer to one of those two. If they have been allocated (at all) 2008 is signified by setting the bit-fields above. 2009 */ 2010 BLAST_WordFinderPtr wfp, /* find initial words. */ 2011 wfp_first, /* words for first pass. */ 2012 wfp_second;/* words for second pass. */ 2013 /* For the two-pass this should be set to TRUE on the first (preliminary) 2014 pass and FALSE on the second pass. 2015 */ 2016 Boolean prelim; 2017 /* 2018 The "current" hit, that is the one being worked on right now. 2019 If a hitlist is deemed significant, then "current_hitlist" is 2020 moved to "seqalign". current_hitlist_purge specifies 2021 whether the hitlist should be purged after each call to a 2022 WordFinder; it will generally be purged except for non-initial 2023 frames of tblast[nx]. 2024 */ 2025 Boolean current_hitlist_purge; 2026 BLAST_HitListPtr current_hitlist; 2027 2028 BlastSequenceBlkPtr PNTR query_dnap; /* query DNAP sequence. */ 2029 2030 /* 2031 The worst evalue seen by this thread so far. 2032 Only filled in if the hitlist is already full, otherwise 2033 it should be DBL_MAX. 2034 */ 2035 Nlm_FloatHi worst_evalue; 2036 /* 2037 Size of the HSP array on the "current_hitlist" 2038 */ 2039 Int4 hsp_array_size; 2040 /* 2041 Contains hits that are significant. 2042 */ 2043 Int4 result_size; 2044 BLASTResultsStructPtr result_struct; 2045 2046 Int8 dblen; /* total length of the database. */ 2047 Int8 dblen_eff; /* effective length of the database. */ 2048 Int8 dblen_eff_real; /* effective length of the database. */ 2049 Int4 dbseq_num; /* number of sequences in the database. */ 2050 Int4 length_adjustment; /* amount removed from end of query and db sequences. */ 2051 Nlm_FloatHi searchsp_eff; /* Effective search space (used for statistics). */ 2052 Int4 rps_qlen; /* original query sequence length (RPS-BLAST only) */ 2053 ReadDBFILEPtr rdfp, /* I/O PTR for database files. */ 2054 rdfp_list; /* linked rdfp list of all databases. */ 2055 /* The subject info (id and defline) is kept here for the current sequence 2056 if the readdb facility is not used. This structure should only 2057 be used if rdfp is NULL. 2058 */ 2059 BLASTSubjectInfoPtr subject_info; 2060 2061 /* Data used in threads - previously global variables */ 2062 2063 BlastThrInfoPtr thr_info; 2064 2065 /* 2066 start and stop of query that must be included for an alignment 2067 to be counted. The Boolean whole_query specifies whether these 2068 are valid (i.e., have been set) or not. 2069 */ 2070 Boolean whole_query; 2071 Int4 required_start, required_end; 2072 2073 /* 2074 Callback functions to indicate progress, or lack thereof. 2075 */ 2076 /* int (LIBCALLBACK *tick_callback)PROTO((Int4 done, Int4 positives)); */ 2077 /* int (LIBCALLBACK *star_callback)PROTO((Int4 done, Int4 positives)); */ 2078 /* 2079 Callback function to handle results (e.g., print them out for neighboring) 2080 in place of BlastSaveCurrentHitlist. 2081 */ 2082 int (LIBCALLBACK *handle_results)PROTO((VoidPtr search)); 2083 /* 2084 Output stream to put results to 2085 */ 2086 VoidPtr output; 2087 /* 2088 These "counters" keep track of how often certain operations 2089 were performed. 2090 2091 This counting is performed only if BLAST_COLLECT_STATS is defined. 2092 */ 2093 Int8 first_pass_hits, /* no. of hits on 1st pass. */ 2094 second_pass_hits, /* no. of hits on 2nd pass. */ 2095 second_pass_trys, /* no. of seqs that made it to 2nd pass. */ 2096 first_pass_extends, /* no. extended on 1st pass. */ 2097 second_pass_extends, /* no. extended on 2nd pass. */ 2098 first_pass_good_extends,/* no. successfully extended on 1st pass. */ 2099 second_pass_good_extends,/* no. successfully extended on 2nd pass. */ 2100 number_of_seqs_better_E,/* how many sequences were better than E. */ 2101 prelim_gap_no_contest, /* No. of HSP's under E=10 alone. */ 2102 prelim_gap_passed, /* No. of HSP's that passed prelim gapping. */ 2103 prelim_gap_attempts, /* No. of HSP's we attempted to gap. */ 2104 real_gap_number_of_hsps, /* How many HSP's were gapped in BlastGetGappedScore. */ 2105 semid; /* Here will be stored ID of load-ballance semaphore */ 2106 GreedyAlignMemPtr abmp; /* Memory for megablast greedy extension */ 2107 Int4 PNTR query_context_offsets; /* offsets for all queries and strands in a 2108 concatenated sequence */ 2109 SeqIdPtr PNTR qid_array; /* Ids of all queries in Mega BLAST search */ 2110 BLASTResultsStructPtr PNTR mb_result_struct; /* one result struct per query 2111 for Mega BLAST */ 2112 ValNodePtr mb_endpoint_results; /* Points to linked list of results */ 2113 } BlastSearchBlk, PNTR BlastSearchBlkPtr; 2114 2115 typedef struct _blast_hsp_segment { 2116 Int4 q_start, q_end; 2117 Int4 s_start, s_end; 2118 struct _blast_hsp_segment PNTR next; 2119 } BLASTHSPSegment, PNTR BLASTHSPSegmentPtr; 2120 2121 #ifdef __cplusplus 2122 } 2123 #endif 2124 #endif /* !__BLASTSTR__ */ 2125