1 /* ===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================*/
24 /*****************************************************************************
25 
26 File name: blastdef.h
27 
28 Author: Tom Madden
29 
30 Contents: #defines and definitions for structures used by BLAST.
31 
32 ******************************************************************************/
33 /* $Revision: 6.169 $
34 * $Log: blastdef.h,v $
35 * Revision 6.169  2007/03/13 20:39:58  madden
36 *  - Change the type of the dropoff_1st_pass, dropoff_2nd_pass,
37 *    gap_x_dropoff, and gap_x_dropoff_final fields of the
38 *    BLAST_OptionsBlk struct to Nlm_FloatHi.
39 *  [from Mike Gertz]
40 *
41 * Revision 6.168  2006/05/03 14:41:50  madden
42 * Added a Boolean field "unified_p" to the BLAST_OptionsBlk
43 * structure.  The field indicates whether to use a combination of
44 * alignment and compositional p-values when evaluating significance;
45 * the field is ignored unless composition-based statistics is on.
46 * (from Mike Gertz).
47 *
48 * Revision 6.167  2006/03/21 22:35:27  camacho
49 * Add support for setting database length in BLAST_WizardOptions{Blk,Mask}
50 *
51 * Revision 6.166  2006/01/24 18:38:15  papadopo
52 * from Mike Gertz: Remove #define'd constants that specify the composition adjustment mode.  These have been replaced by an enum in the composition adjustment library
53 *
54 * Revision 6.165  2005/08/31 20:32:47  coulouri
55 * From Mike Gertz:
56 *    - Added the kappa_expect_value field to the
57 *      BLAST_OptionsBlk datatype.  This new field holds the cutoff value
58 *      used by RedoAlignmentCore; it should equal expect_value if
59 *      RedoAlignmentCore will not be called.
60 *    - removed the now unused original_expect_value field of the
61 *      BLAST_OptionsBlk datatype.
62 *
63 * Revision 6.164  2005/07/27 15:51:54  coulouri
64 * remove unused queue_callback
65 *
66 * Revision 6.163  2005/05/16 17:43:29  papadopo
67 * From Alejandro Schaffer: Added support for compositional score
68 * matrix adjustment
69 *
70 * Revision 6.162  2005/04/25 14:16:36  coulouri
71 * set db_chunk_size adaptively
72 *
73 * Revision 6.161  2005/01/10 18:52:29  coulouri
74 * fixes from morgulis to allow concatenation of >255 queries in [t]blastn
75 *
76 * Revision 6.160  2004/11/19 13:22:05  madden
77 * Remove no_check_score completely (from Mike Gertz)
78 *
79 * Revision 6.159  2004/09/28 16:02:29  papadopo
80 * From Michael Gertz: Changed the "sumscore" field of an HSP to
81 * "xsum" to represent a normalized sum score of linked HSPs;
82 * the normalized score is more appropriate/useful in doing linking.
83 *
84 * Revision 6.158  2004/06/30 12:29:00  madden
85 * Removed typedef for BlastPruneSapStruct and some defines, moved to blfmtutl.h
86 *
87 * Revision 6.157  2004/04/30 12:45:45  coulouri
88 * bump version to 2.2.9
89 *
90 * Revision 6.156  2004/04/13 21:02:52  madden
91 * Add ignore_gilist Boolean to Options for use in formatting
92 *
93 * Revision 6.155  2004/02/04 15:35:03  camacho
94 * Rollback to fix problems in release 2.2.7
95 *
96 * Revision 6.154  2004/01/27 20:46:06  dondosha
97 * Allow values 0, 1, 2 for no_traceback megablast option
98 *
99 * Revision 6.153  2004/01/05 22:09:26  madden
100 * Put back dashes in date
101 *
102 * Revision 6.152  2004/01/02 13:44:32  coulouri
103 * Revert to hardcoded BLAST_RELEASE_DATE
104 *
105 * Revision 6.151  2003/12/29 15:51:18  coulouri
106 * Bump version, use __DATE__ instead of hardcoded date
107 *
108 * Revision 6.150  2003/11/06 19:52:13  dondosha
109 * Added error MBTemplateType, so it can be returned when wordsize/template length combination is wrong
110 *
111 * Revision 6.149  2003/10/02 19:30:11  madden
112 * add field seAlign to SWResults for use in kappa.c
113 *
114 * Revision 6.148  2003/04/09 14:18:34  madden
115 * Update version and release-date
116 *
117 * Revision 6.147  2003/03/26 15:45:48  boemker
118 * Documented relationships among BLAST_OptionsBlk, BLAST_WizardOptionsBlk,
119 * and BLAST_WizardOptionsMask.
120 *
121 * Revision 6.146  2003/03/25 22:23:06  boemker
122 * Replaced cutoff_s2, which isn't used, with cutoff_s, which is.
123 * Added query_lcase_mask.
124 *
125 * Revision 6.145  2003/03/25 19:58:18  boemker
126 * Moved code to initialize search options from blastcgicmd.cpp to here, as
127 * BLAST_Wizard et al.
128 *
129 * Revision 6.144  2003/03/24 19:42:14  madden
130 * Changes to support query concatenation for blastn and tblastn
131 *
132 * Revision 6.143  2002/11/22 23:28:43  dondosha
133 * Use array of structures instead of array of pointers for initial offset pairs
134 *
135 * Revision 6.142  2002/11/16 17:12:55  madden
136 * Change version and date
137 *
138 * Revision 6.141  2002/11/04 22:51:13  dondosha
139 * Changed FloatHi pvalue to Int4 num_ident in HSP structures
140 *
141 * Revision 6.140  2002/09/13 19:11:02  camacho
142 * Added rps_qlen field
143 *
144 * Revision 6.139  2002/09/11 21:15:23  camacho
145 * Removed obsolete #define and comment about BlastSeqIdList structure
146 *
147 * Revision 6.138  2002/09/11 20:46:25  camacho
148 * Removed deprecated BlastSeqIdListPtr code
149 *
150 * Revision 6.137  2002/08/26 15:49:51  madden
151 * Change release date and version
152 *
153 * Revision 6.136  2002/08/09 19:39:20  camacho
154 * Added constants for some blast search parameters
155 *
156 * Revision 6.135  2002/06/21 21:43:01  camacho
157 * Removed obsolete BlastSeqIdList structure and functions
158 *
159 * Revision 6.134  2002/05/17 21:40:13  dondosha
160 * Added 2 optimal Mega BLAST word templates for length 21
161 *
162 * Revision 6.133  2002/05/14 22:20:20  dondosha
163 * Renamed maximal discontiguous template type into optimal
164 *
165 * Revision 6.132  2002/04/23 20:59:53  madden
166 * Change version and date for release
167 *
168 * Revision 6.131  2002/04/09 18:16:43  dondosha
169 * Added more options/parameters for megablast
170 *
171 * Revision 6.130  2002/03/28 18:53:18  madden
172 * Add ValNodePtr mask1 to BlastSearch structure
173 *
174 * Revision 6.129  2001/12/28 20:38:39  dondosha
175 * Moved Mega BLAST related parameters into a separate structure
176 *
177 * Revision 6.128  2001/12/28 18:01:26  dondosha
178 * Added field scoreThisAlign to SWResults to allow more tie-breaking options
179 *
180 * Revision 6.127  2001/12/14 22:05:40  madden
181 * Changed version and release date
182 *
183 * Revision 6.126  2001/09/11 14:28:31  madden
184 * Added timed_out Boolean to SearchBlk
185 *
186 * Revision 6.125  2001/09/07 14:46:44  dondosha
187 * Roll back removal of threshold_first from functions and structures
188 *
189 * Revision 6.124  2001/09/06 20:24:34  dondosha
190 * Removed threshold_first
191 *
192 * Revision 6.123  2001/08/06 12:50:51  madden
193 * Change release date
194 *
195 * Revision 6.122  2001/07/12 19:50:24  madden
196 * Changed release date
197 *
198 * Revision 6.121  2001/06/28 13:42:09  madden
199 * Fixes to prevent overflow on number of hits reporting
200 *
201 * Revision 6.120  2001/06/12 19:48:56  madden
202 * Introduce total_hsp_limit, check before making SeqAlign
203 *
204 * Revision 6.119  2001/04/13 20:56:08  madden
205 * Updated version to 2.2.1, changed date
206 *
207 * Revision 6.118  2001/04/11 20:56:21  madden
208 * Added scalingFactor for rpsblast, changed release date
209 *
210 * Revision 6.117  2001/03/30 21:58:18  madden
211 * Change release date and version
212 *
213 * Revision 6.116  2001/03/27 21:27:01  madden
214 * Minor efficiency in how lookup table is made
215 *
216 * Revision 6.115  2001/03/19 18:52:57  madden
217 * Add base_offset element to structure for BlastHitRange
218 *
219 * Revision 6.114  2001/02/07 21:05:33  dondosha
220 * Added an output stream to BlastOptionsBlk
221 *
222 * Revision 6.113  2000/12/21 22:28:17  dondosha
223 * Added option and parameter for percent identity cutoff
224 *
225 * Revision 6.112  2000/11/29 16:17:56  dondosha
226 * Added a definition of small structure BLASTHSPSegment
227 *
228 * Revision 6.111  2000/11/14 18:14:00  madden
229 * release date to Nov-13-2000
230 *
231 * Revision 6.110  2000/11/08 22:18:05  dondosha
232 * Added longest_intron integer option and parameter
233 *
234 * Revision 6.109  2000/11/07 16:30:25  madden
235 * Introduce intermediate score (before linking of HSPs) for blastx and tblastn
236 *
237 * Revision 6.108  2000/11/03 20:16:24  dondosha
238 * Changed one_line_results option and parameter to more meaningful no_traceback
239 *
240 * Revision 6.107  2000/11/01 16:25:56  madden
241 * Changes from Futamura for psitblastn
242 *
243 * Revision 6.106  2000/10/18 19:53:19  shavirin
244 * Empty log message.
245 *
246 * Revision 6.105  2000/10/18 19:17:56  shavirin
247 * Changed BLAST_ENGINE_VERSION and BLAST_RELEASE_DATE
248 *
249 * Revision 6.104  2000/10/05 19:50:49  dondosha
250 * Added mb_result_struct to the BlastSearchBlk to be used instead of result_struct in Mega BLAST
251 *
252 * Revision 6.103  2000/09/28 14:48:20  dondosha
253 * Added exact_match_array to hitlist structure for megablast initial hits
254 *
255 * Revision 6.102  2000/09/21 19:16:30  madden
256 * increase AWAKE_THR_MIN_SIZE by 100
257 *
258 * Revision 6.101  2000/08/29 19:35:49  madden
259 * Add gilist_not_owned to blast_gi_list
260 *
261 * Revision 6.100  2000/08/08 20:37:21  madden
262 * increase version number to 2.1.1 and release date
263 *
264 * Revision 6.99  2000/07/17 14:05:22  shavirin
265 * Added parameter Out-Of-Frame shift penalty and query DNAP sequence
266 *
267 * Revision 6.98  2000/07/11 18:38:02  madden
268 * decreased size of helper array, added prefetch to BlastGappedScoreInternal
269 *
270 * Revision 6.97  2000/07/11 17:16:20  shavirin
271 * Added new parameter is_ooframe for Out-Of-Frame gapping algorithm.
272 *
273 * Revision 6.96  2000/07/10 15:41:28  madden
274 * Add typedef for BLAST_HSP_helper
275 *
276 * Revision 6.95  2000/07/07 21:20:07  vakatov
277 * Get all "#include" out of the 'extern "C" { }' scope!
278 *
279 * Revision 6.94  2000/07/06 17:24:55  dondosha
280 * Added option and parameter megablast_full_deflines
281 *
282 * Revision 6.93  2000/06/30 17:52:45  madden
283 * Move AWAKE_THR_MIN_SIZE to blastdef.h
284 *
285 * Revision 6.92  2000/06/29 20:30:03  madden
286 * Update version and date
287 *
288 * Revision 6.91  2000/06/08 20:34:18  madden
289 * add explode_seqids option to show all ids in a defline
290 *
291 * Revision 6.90  2000/05/26 20:04:57  madden
292 * Raise version and date
293 *
294 * Revision 6.89  2000/05/12 19:40:59  dondosha
295 * Added qid_array element to BlastSearchBlk
296 *
297 * Revision 6.88  2000/05/01 19:04:31  shavirin
298 * Changed parameter level in BlastErrorMsg structure from Uint1 to Uint2.
299 *
300 * Revision 6.87  2000/04/21 20:48:05  madden
301 * Change version and date
302 *
303 * Revision 6.86  2000/04/06 14:47:10  madden
304 * Added original_expect_value
305 *
306 * Revision 6.85  2000/04/03 21:20:03  dondosha
307 * Added option and parameter is_neighboring
308 *
309 * Revision 6.84  2000/03/31 19:10:44  dondosha
310 * Changed some names related to MegaBlast
311 *
312 * Revision 6.83  2000/03/13 21:01:24  dondosha
313 * Added boolean option sort_gi_list to options block structure
314 *
315 * Revision 6.82  2000/02/29 18:17:23  shavirin
316 * Variable query_dna_mask changed to query_lcase_mask.
317 *
318 * Revision 6.81  2000/02/18 15:30:36  shavirin
319 * Added parameter query_dna_mask into options and parameters.
320 *
321 * Revision 6.80  2000/02/17 21:23:09  shavirin
322 * Added parameter is_rps_blast.
323 *
324 * Revision 6.79  2000/02/17 19:00:44  shavirin
325 * Removed theCacheSize parameter from everywhere.
326 *
327 * Revision 6.78  2000/02/15 19:06:09  shavirin
328 * Added parameter filter_string into BLAST_ParameterBlk structure.
329 *
330 * Revision 6.77  2000/02/02 18:21:51  madden
331 * Add LinkHelpStruct definition
332 *
333 * Revision 6.76  2000/02/02 16:52:43  dondosha
334 * Added option one_line_results to BLAST_OptionsBlk and BLAST_ParameterBlk
335 *
336 * Revision 6.75  2000/02/01 18:02:22  dondosha
337 * Added greedy alignment option to BLAST_OptionsBlk and query context offsets array to BlastSearchBlk
338 *
339 * Revision 6.74  2000/01/26 22:00:52  madden
340 * Added subject_index field to SWResults
341 *
342 * Revision 6.73  2000/01/20 19:12:00  madden
343 * Change BLAST version and date
344 *
345 * Revision 6.72  2000/01/13 18:10:43  madden
346 * Fix problem with incorrect stat values for blastn and missing hits
347 *
348 * Revision 6.71  2000/01/11 17:02:48  shavirin
349 * Added element theCacheSize into BLAST_OptionsBlk and BLAST_ParameterBlk.
350 *
351 * Revision 6.70  1999/12/31 14:23:19  egorov
352 * Add support for using mixture of real and maks database with gi-list files:
353 * 1. Change logic of creating rdfp list.
354 * 2. BlastGetDbChunk gets real databases first, then masks.
355 * 3. Propoper calculation of database sizes using alias files.
356 * 4. Change to CommonIndex to support using of mask databases.
357 * 5. Use correct gis in formated output (BlastGetAllowedGis()).
358 * 6. Other small changes
359 *
360 * Revision 6.69  1999/12/21 20:04:15  egorov
361 * gi_list now contains start position for corresponding database
362 *
363 * Revision 6.68  1999/11/30 18:23:08  shavirin
364 * Added parameter max_num_patterns to the BLAST_OptionsBlkPtr structure
365 *
366 * Revision 6.67  1999/11/15 22:03:31  madden
367 * added Boolean isFirstAlignment to SWResults
368 *
369 * Revision 6.66  1999/11/12 20:57:39  shavirin
370 * Added parameter use_best_align into BLAST_ParameterBlkPtr
371 *
372 * Revision 6.65  1999/11/12 16:37:30  shavirin
373 * Added new option use_best_align into Blast options.
374 *
375 * Revision 6.64  1999/10/26 20:45:19  madden
376 * Add use_real_db_size option
377 *
378 * Revision 6.63  1999/10/05 17:42:54  shavirin
379 * Removed global variables from blast.c
380 *
381 * Revision 6.62  1999/09/28 20:14:31  madden
382 * Joerg changes to mimize cache misses
383 *
384 * Revision 6.61  1999/08/31 13:42:23  madden
385 * Moved SWResults to blastdef.h from profiles.h
386 *
387 * Revision 6.60  1999/08/27 18:07:33  shavirin
388 * Passed parameter decline_align from top to the engine.
389 *
390 * Revision 6.59  1999/08/26 14:56:49  madden
391 * Raise version and date
392 *
393 * Revision 6.58  1999/08/26 14:55:16  madden
394 * Fixed Int8 problem
395 *
396 * Revision 6.57  1999/08/20 19:47:41  madden
397 * removed version element
398 *
399 * Revision 6.56  1999/08/17 18:37:12  shavirin
400 * Added phi_pattern element into options block.
401 *
402 * Revision 6.55  1999/08/17 14:02:34  madden
403 * add smith_waterman and tweak_parameters fields to Options
404 *
405 * Revision 6.54  1999/05/10 18:47:52  madden
406 * Changed version to 2.0.9
407 *
408 * Revision 6.53  1999/05/08 15:04:24  madden
409 * Changed version and release date
410 *
411 * Revision 6.52  1999/04/23 19:25:01  madden
412 * Fixes a prototype complaint
413 *
414 * Revision 6.51  1999/04/23 16:45:54  madden
415 * call BQ_IncSemaphore as callback
416 *
417 * Revision 6.50  1999/04/22 16:46:13  shavirin
418 * Added semaphore ID to the search_blk structure.
419 *
420 * Revision 6.49  1999/04/01 21:42:47  madden
421 * Fix memory leaks when gi list is used
422 *
423 * Revision 6.48  1999/03/18 21:13:32  egorov
424 * The "output" filed added to search block.  This is VoidPtr and an application can
425 * use it as stream, ASNIO, etc to output blast results.
426 *
427 * Revision 6.47  1999/03/17 16:49:11  madden
428 * Removed comment within comment
429 *
430 * Revision 6.46  1999/02/17 13:23:01  madden
431 * Added hsp_num_max
432 *
433 * Revision 6.45  1999/01/28 16:04:56  madden
434 * do_not_reallocate Boolean for HSPs
435 *
436 * Revision 6.44  1999/01/26 17:56:37  madden
437 * query_id added to HitRange
438 *
439 * Revision 6.43  1999/01/05 13:57:19  madden
440 * Changed version and release date
441 *
442  * Revision 6.42  1998/12/31 18:17:03  madden
443  * Added strand option
444  *
445  * Revision 6.41  1998/12/29 17:45:06  madden
446  * Add do_sum_stats flag
447  *
448  * Revision 6.40  1998/12/21 13:09:53  madden
449  * Changed version and release date
450  *
451  * Revision 6.39  1998/11/04 01:36:05  egorov
452  * Add support for entrez-query and org-name to blast3
453  *
454  * Revision 6.38  1998/09/16 18:58:57  madden
455  * Changed release number and date
456  *
457  * Revision 6.37  1998/09/14 15:11:15  egorov
458  * Add support for Int8 length databases; remove unused variables
459  *
460  * Revision 6.36  1998/07/30 19:00:32  madden
461  * Change to allow search of subset of database
462  *
463  * Revision 6.35  1998/07/28 21:17:59  madden
464  * Added do_not_reevaluate
465  *
466  * Revision 6.34  1998/07/25 14:26:38  madden
467  * Added comments
468  *
469  * Revision 6.33  1998/07/22 12:16:25  madden
470  * Added handle_results
471  *
472  * Revision 6.32  1998/07/21 20:58:04  madden
473  * Changes to allow masking at hash only
474  *
475  * Revision 6.31  1998/07/17 15:39:56  madden
476  * Changes for Effective search space.
477  *
478  * Revision 6.30  1998/07/14 20:17:05  egorov
479  * Add two new parameters (gilist and gifile) to BLAST_OptionsBlk
480  *
481  * Revision 6.29  1998/06/17 18:10:07  madden
482  * Added isPatternSearch to Options
483  *
484  * Revision 6.28  1998/06/12 16:08:49  madden
485  * BlastHitRange stuff
486  *
487  * Revision 6.27  1998/05/28 19:59:16  madden
488  * Added typedef for BLASTHeapStruct
489  *
490  * Revision 6.26  1998/05/17 16:28:43  madden
491  * Allow changes to filter options and cc filtering.
492  *
493  * Revision 6.25  1998/05/05 13:56:38  madden
494  * Raised version to 2.0.5 and changed date
495  *
496  * Revision 6.24  1998/04/24 19:27:05  madden
497  * Added BlastMatrixRescalePtr
498  *
499  * Revision 6.23  1998/04/01 22:47:14  madden
500  * Added query_invalid flag
501  *
502  * Revision 6.22  1998/03/24 15:38:22  madden
503  * Use BlastDoubleInt4Ptr to keep track of gis and ordinal_ids
504  *
505  * Revision 6.21  1998/03/18 14:14:20  madden
506  * Support random access by gi list
507  *
508  * Revision 6.20  1998/03/14 18:29:21  madden
509  * Added BlastSeqIdListPtr
510  *
511  * Revision 6.19  1998/02/26 22:34:37  madden
512  * Changes for 16 bit windows
513  *
514  * Revision 6.18  1998/02/26 19:10:37  madden
515  * Removed elements with BLAST_COLLECT_SPECIAL_STATS defines
516  *
517  * Revision 6.17  1998/02/24 22:46:29  madden
518  * Added perform_culling Boolean and changed release date
519  *
520  * Revision 6.16  1998/02/19 17:17:10  madden
521  * Use of Int4 rather than Int2 when pruning SeqAlign
522  *
523  * Revision 6.15  1998/01/05 16:46:52  madden
524  * One or both strands can be searched, as opposed to only both, changes to number of contexts
525  *
526  * Revision 6.14  1997/12/23 19:14:14  madden
527  * release version to 2.0.4
528  *
529  * Revision 6.13  1997/12/23 18:12:32  madden
530  * Changes for range-dependent blast
531  *
532  * Revision 6.12  1997/12/12 20:38:02  madden
533  * Fix to comments
534  *
535  * Revision 6.11  1997/12/11 22:20:16  madden
536  * Corrected blast_type defines
537  *
538  * Revision 6.10  1997/12/10 22:41:40  madden
539  * program number defines
540  *
541  * Revision 6.9  1997/11/14 21:30:16  madden
542  * Changed version and date
543  *
544  * Revision 6.8  1997/10/26 17:26:59  madden
545  * Changes for range dependent limits
546  *
547  * Revision 6.7  1997/10/01 13:35:28  madden
548  * Changed BLAST_VERSION to BLAST_ENGINE_VERSION
549  *
550  * Revision 6.6  1997/09/22 17:36:24  madden
551  * MACROS for position-specific matrices from Andy Neuwald
552  *
553  * Revision 6.5  1997/09/18 22:22:12  madden
554  * Added prune functions
555  *
556  * Revision 6.4  1997/09/11 18:49:26  madden
557  * Changes to enable searches against multiple databases.
558  *
559  * Revision 6.3  1997/09/10 21:27:57  madden
560  * Changes to set CPU limits
561  *
562  * Revision 6.2  1997/09/03 19:06:35  madden
563  * changed BLAST_VERSION and BLAST_RELEASE_DATE
564  *
565  * Revision 6.1  1997/08/27 14:46:48  madden
566  * Changes to enable multiple DB searches
567  *
568  * Revision 6.0  1997/08/25 18:52:32  madden
569  * Revision changed to 6.0
570  *
571  * Revision 1.63  1997/08/20 21:43:10  madden
572  * Updated release date
573  *
574  * Revision 1.62  1997/07/21 17:37:15  madden
575  * Added define for BLAST_RELEASE_DATE
576  *
577  * Revision 1.61  1997/07/18 20:55:45  madden
578  * Added BLAST_VERSION
579  *
580  * Revision 1.60  1997/07/15 20:36:43  madden
581  * Added ValNodePtr mask
582  *
583  * Revision 1.59  1997/07/14 15:33:00  madden
584  * typedef for BlastErrorMsg
585  *
586  * Revision 1.58  1997/05/22 21:24:52  madden
587  * Added support for final gapX dropoff value
588  *
589  * Revision 1.57  1997/05/20 17:51:33  madden
590  * Added element SeqLocPtr query_slp to BlastSearch
591  *
592  * Revision 1.56  1997/05/06 22:19:35  madden
593  * Added use_large_gaps and subject_length
594  *
595  * Revision 1.55  1997/04/09  20:01:53  madden
596  * Added seqid_list to SearchBlk
597  *
598  * Revision 1.54  1997/04/03  19:48:13  madden
599  * Changes to use effective database length instead of the length of each
600  * sequence in statistical calculations.
601  *
602  * Revision 1.53  1997/03/31  17:07:57  madden
603  * Added BLAST_COLLECT_STATS define.
604  *
605  * Revision 1.52  1997/03/20  22:56:24  madden
606  * Added gap_info to hsp.
607  *
608  * Revision 1.51  1997/03/14  22:06:11  madden
609  * fixed MT bug in BlastReevaluateWithAmbiguities.
610  *
611  * Revision 1.50  1997/03/08  16:52:16  madden
612  * y
613  * Added discontinuous option to ParameterBlk.
614  *
615  * Revision 1.49  1997/02/25  19:17:05  madden
616  * Added discontinuous flag to options.
617  *
618  * Revision 1.48  1997/02/23  16:44:47  madden
619  * GapAlignBlkPtr added to search structure.
620  *
621  * Revision 1.47  1997/02/20  18:38:34  madden
622  * Added Int4 db_length to Options block.
623  *
624  * Revision 1.46  1997/02/18  21:03:00  madden
625  * Added #define FILTER_NONE 0.
626  *
627  * Revision 1.45  1997/02/17  17:40:18  madden
628  * Added seqalign to ResultHitlistptr
629  *
630  * Revision 1.44  1997/02/11  19:30:54  madden
631  * Added program_name to Options.
632  *
633  * Revision 1.43  1997/02/10  20:27:01  madden
634  * Changed some CharPtr's into Uint1Ptr's.
635  *
636  * Revision 1.42  1997/02/10  20:14:23  madden
637  * replaced doubles by Nlm_FloatHi's.
638  *
639  * Revision 1.41  1997/02/10  20:03:58  madden
640  * Added specific to BlastAllWordsPtr.
641  *
642  * Revision 1.40  1997/02/10  15:36:40  madden
643  * added posConverged to the BlastSearchBlk.
644  *
645  * Revision 1.39  1997/02/06  14:27:15  madden
646  * Addition of BlastAllWord structure.
647  *
648  * Revision 1.38  1997/02/03  13:02:12  madden
649  * Added length to BLASTSubjectInfo.
650  *
651  * Revision 1.37  1997/01/17  17:41:44  madden
652  * Added flags for position based BLAST.
653  *
654  * Revision 1.36  1997/01/13  15:37:05  madden
655  * Changed prototypes for star_callback and tick_callback.
656  *
657  * Revision 1.35  1997/01/11  18:22:10  madden
658  * Changes to allow S2 to be set.
659  *
660  * Revision 1.34  1997/01/09  17:44:35  madden
661  * Added "bit_score" to BLASTResultHsp.
662  *
663  * Revision 1.33  1996/12/27  20:44:10  madden
664  * Chnages to require that part of the query be included.
665  *
666  * Revision 1.32  1996/12/23  14:04:44  madden
667  * Added gap_trigger.
668  *
669  * Revision 1.31  1996/12/20  21:11:40  madden
670  * Changes to allow multiple hits runs only.
671  *
672  * Revision 1.30  1996/12/18  14:33:13  madden
673  * Added high_score element.
674  *
675  * Revision 1.29  1996/12/17  17:27:03  madden
676  * Count number of attempted gappings.
677  *
678  * Revision 1.28  1996/12/17  13:47:57  madden
679  * Added star_proc.
680  *
681  * Revision 1.27  1996/12/16  14:35:48  madden
682  * Added gapped_calculation Boolean
683  *
684  * Revision 1.26  1996/12/13  22:00:23  madden
685  * Corrected starting point for gapped extension with traceback.
686  *
687  * Revision 1.25  1996/12/13  18:13:56  madden
688  * Added tick callback functions
689  *
690  * Revision 1.24  1996/12/13  15:09:31  madden
691  * Changes to parameters used for gapped extensions.
692  *
693  * Revision 1.23  1996/12/09  23:24:05  madden
694  * Added parameters to control which sequences get a gapped alignment.
695  *
696  * Revision 1.22  1996/12/08  15:19:59  madden
697  * Added parameters for gapped alignments.
698  *
699  * Revision 1.21  1996/11/27  21:56:57  madden
700  * Removed define for XNU.
701  *
702  * Revision 1.20  1996/11/18  18:07:57  madden
703  * *** empty log message ***
704  *
705  * Revision 1.19  1996/11/18  17:28:13  madden
706  * Added BLAST_SEARCH_ALLOC_TRANS_INFO define.
707  *
708  * Revision 1.18  1996/11/18  15:45:40  madden
709  * Defines for filter type added (by S. Shavirin),.
710  *
711  * Revision 1.17  1996/11/15  17:54:54  madden
712  * Added support for alternate genetic codes for blastx, tblast[nx].
713  *
714  * Revision 1.16  1996/11/13  22:35:18  madden
715  * Added genetic_code and db_genetic_code elements to blastdef.h
716  *
717  * Revision 1.15  1996/11/12  16:21:53  madden
718  * Added context_factor
719  *
720  * Revision 1.14  1996/11/06  22:10:01  madden
721  * translation_buffer changed from CharPtr to Uint1Ptr.
722  *
723  * Revision 1.13  1996/11/04  16:59:43  madden
724  * Added translation_table and translation_table_rc elements
725  * to BlastSearchBlk.
726  *
727  * Revision 1.12  1996/10/03  20:49:29  madden
728  * Added xsum member to HSP_Link structure.
729  * ,.
730  *
731  * Revision 1.11  1996/10/01  21:24:02  madden
732  * Added e2.
733  *
734  * Revision 1.10  1996/09/26  13:02:32  madden
735  * Removed ifdef for BLAST_COLLECT_STATS with counters.
736  *
737  * Revision 1.9  1996/09/12  21:13:46  madden
738  * *** empty log message ***
739  *
740  * Revision 1.8  1996/09/11  22:21:51  madden
741  * *** empty log message ***
742  *
743  * Revision 1.7  1996/09/11  19:14:09  madden
744  * Added BLAST_OptionsBlkPtr structure and use thereof.
745  *
746  * Revision 1.6  1996/08/14  18:16:13  madden
747  * removed frame from Context.
748  *
749  * Revision 1.5  1996/08/14  17:19:02  madden
750  * Added frame to BlastSeqBlkPtr.
751  *
752  * Revision 1.4  1996/08/13  15:26:29  madden
753  * Changes for tblastn.
754  *
755  * Revision 1.3  1996/08/09  22:11:12  madden
756  * Added original_sequence to BlastSequenceBlk.
757  *
758  * Revision 1.2  1996/08/07  14:24:42  madden
759  * Removed include for blast18p.h and objblst2.h
760  *
761  * Revision 1.1  1996/08/05  20:32:18  madden
762  * Initial revision
763  *
764  * Revision 1.51  1996/08/02  14:20:06  madden
765  * Removed multiproc strucutre.
766  *
767  * Revision 1.50  1996/07/31  13:09:17  madden
768  * Changes for threaded blast.
769  *
770  * Revision 1.49  1996/07/24  12:01:28  madden
771  * Changes for blastx
772  *
773  * Revision 1.48  1996/07/18  22:00:49  madden
774  * Addition of BLAST_ExtendWordParams structure.
775  *
776  * Revision 1.47  1996/07/18  13:36:34  madden
777  * Addition of the BLASTContextStructPtr.
778  *
779  * Revision 1.46  1996/07/16  14:37:42  madden
780  * Removed _blast_link_structure .
781  *
782  * Revision 1.45  1996/07/11  16:03:58  madden
783  * SaveCurrentHitlist keeps track of which set an HSP belongs to.
784  *
785  * Revision 1.44  1996/07/02  14:33:16  madden
786  * Added hspcnt_max.
787  *
788  * Revision 1.43  1996/07/02  12:04:15  madden
789  * HSP's saved on array, rather than linked list.
790  *
791  * Revision 1.42  1996/06/26  19:38:12  madden
792  * Removed ifdef.
793  *
794  * Revision 1.41  1996/06/24  20:26:46  madden
795  * Added dropoff_1st_pass and dropoff_2nd_pass to ParameterBlkPtr.
796  *
797  * Revision 1.40  1996/06/24  17:58:21  madden
798  * Removed X_set parameter, added right and left dropoff's.
799  *
800  * Revision 1.39  1996/06/20  16:15:57  madden
801  * Replaced int's with Int4's.
802  *
803  * Revision 1.38  1996/06/19  14:19:53  madden
804  * Added define for BLASTSubjectInfoPtr.
805  *
806  * Revision 1.37  1996/06/17  19:03:07  madden
807  * Rmoved unused structure.
808  *
809  * Revision 1.36  1996/06/14  17:58:13  madden
810  * Changes to avoid nulling out arrays for every sequence.
811  *
812  * Revision 1.35  1996/06/13  21:03:06  madden
813  * Added actual_window element to ExtendWord structure.
814  *
815  * Revision 1.34  1996/06/11  17:58:31  madden
816  * Changes to allow shorter arrays for multiple hits type blast.
817  *
818  * Revision 1.33  1996/06/10  16:52:16  madden
819  * Use bit-shifting and masking instead of dividing and remainder.
820  *
821  * Revision 1.32  1996/06/10  13:44:07  madden
822  * Changes to reduce the size of the "already visited" array.
823  *
824  * Revision 1.31  1996/06/06  17:55:16  madden
825  * Added number_of_bits to ParameterBlkPtr.
826  *
827  * Revision 1.30  1996/06/06  13:23:17  madden
828  * Added elements cutoff_big_gap and ignore_small_gaps to ParameterBlkPt.
829  *
830  * Revision 1.29  1996/05/29  12:44:04  madden
831  * Added structure BlastTimeKeeper.
832  *
833  * Revision 1.28  1996/05/28  14:16:32  madden
834  * Added Int4's to collect statistics info.
835  *
836  * Revision 1.27  1996/05/23  21:55:04  madden
837  * Removed unused variable initlen
838  *
839  * Revision 1.26  1996/05/23  21:48:23  madden
840  * Removed unused defines.
841  *
842  * Revision 1.25  1996/05/16  19:51:09  madden
843  * Added documentation block.
844  *
845  * Revision 1.24  1996/05/16  13:29:38  madden
846  * Added defines for contiguous or discontiguous calls.
847  *
848  * Revision 1.23  1996/05/01  15:00:00  madden
849  * Added BlastResults sturcture defs.
850  *
851  * Revision 1.22  1996/04/24  16:17:26  madden
852  * Added new structure, BLAST_Link.
853  *
854  * Revision 1.21  1996/04/24  12:52:48  madden
855  * ID's for sequences simplified.
856  *
857  * Revision 1.20  1996/04/03  19:14:35  madden
858  * Removed defunct HSP ptr's.
859  *
860  * Revision 1.19  1996/03/29  21:27:43  madden
861  * "hitlist" now kept on SeqAlign rather than HitList.
862  *
863  * Revision 1.17  1996/03/27  19:51:53  madden
864  * "current_hitlist" added to Search Structure.
865  *
866  * Revision 1.16  1996/03/26  19:36:59  madden
867  * Added  ReadDBFILEPtr to Search structure.
868  *
869  * Revision 1.15  1996/03/25  16:35:18  madden
870  * Added old_stats.
871  *
872  * Revision 1.14  1996/02/28  21:37:43  madden
873  * Added "trim" variables to segments for HSP.
874  *
875  * Revision 1.13  1996/02/06  22:51:13  madden
876  * Added "prelim" to BlastSearch
877  *
878  * Revision 1.12  1996/02/02  19:25:32  madden
879  * Added wfp_first and wfp_second to BlastParameterBlk for first and second pass.
880  *
881  * Revision 1.11  1996/01/29  21:12:07  madden
882  * *** empty log message ***
883  *
884  * Revision 1.10  1996/01/23  16:31:47  madden
885  * e_cutoff changed from BLAST_Score to double in ParameterBlk.
886  *
887  * Revision 1.9  1996/01/17  17:00:40  madden
888  * Added gap parameters to ParameterBlk, dblen to SearchBlk.
889  *
890  * Revision 1.8  1996/01/17  13:45:58  madden
891  * Added gap_prob and gap_decay_rate to ParameterBlk.
892  *
893  * Revision 1.7  1996/01/11  15:17:36  madden
894  * Added process_num to ParameterBlk.
895  *
896  * Revision 1.6  1996/01/08  23:23:55  madden
897  * removed "len" from HSP.
898  *
899  * Revision 1.5  1996/01/06  18:57:47  madden
900  * Added BLAST_HSP_LINK structure.
901  *
902  * Revision 1.4  1995/12/28  21:26:05  madden
903  * *** empty log message ***
904  *
905  * Revision 1.3  1995/12/26  23:04:14  madden
906  * Added parameters to BlastParameterBlk.
907  *
908  * Revision 1.2  1995/12/21  23:10:41  madden
909  * BLAST_Score prototypes moved to blastkar.h.
910  *
911  * Revision 1.1  1995/12/19  22:33:06  madden
912  * Initial revision
913  *
914  * Revision 1.1  1995/12/08  15:48:23  madden
915  * Initial revision
916  *
917  * */
918 #ifndef __BLASTSTR__
919 #define __BLASTSTR__
920 
921 #include <ncbi.h>
922 #include <lookup.h>
923 #include <blastkar.h>
924 #include <objalign.h>
925 #include <sequtil.h>
926 #include <readdb.h>
927 #include <gapxdrop.h>
928 #include <mbalign.h>
929 
930 #ifdef __cplusplus
931 extern "C" {
932 #endif
933 
934 /* Defines for program numbers. (Translated in BlastGetProgramNumber). */
935 #define blast_type_undefined 0
936 #define blast_type_blastn 1
937 #define blast_type_blastp 2
938 #define blast_type_blastx 3
939 #define blast_type_tblastn 4
940 #define blast_type_tblastx 5
941 #define blast_type_psitblastn 6
942 
943 
944 /* defines for strand_option, determines which strand of query to compare. */
945 #define BLAST_TOP_STRAND 1
946 #define BLAST_BOTTOM_STRAND 2
947 #define BLAST_BOTH_STRAND 3
948 
949 /* Defines that specify whether or not BLAST should delete some memory, or
950 	leave it up to the caller.
951 */
952 #define BLAST_OWN 0
953 #define BLAST_NOT_OWN 1
954 
955 /* Specifies minimum search space size for an awak thread. */
956 #define AWAKE_THR_MIN_SIZE 2000000000000.0
957 
958 #ifndef _BLASTCONCAT_
959 #include "blastconcat.h"
960 #endif
961 /* --KM concat */
962 
963 /* Some default values (used when creating blast options block and for
964  * command-line program defaults. When changing these defaults, please
965  * remember to update the defaults in the command-line programs */
966 #define WINDOW_SIZE_PROT 40
967 #define WINDOW_SIZE_NUCL 0
968 #define WINDOW_SIZE_MEGABLAST 0
969 
970 #define WORDSIZE_PROT 3
971 #define WORDSIZE_NUCL 11
972 #define WORDSIZE_MEGABLAST 28
973 
974 /* Protein gap costs are the defaults for the BLOSUM62 scoring matrix.
975  * More gap costs are listed in BLASTOptionSetGapParams */
976 #define GAP_OPEN_PROT 11
977 #define GAP_OPEN_NUCL 5
978 #define GAP_OPEN_MEGABLAST 0
979 
980 #define GAP_EXTN_PROT 1
981 #define GAP_EXTN_NUCL 2
982 #define GAP_EXTN_MEGABLAST 0
983 
984 #define WORD_THRESHOLD_BLASTP 11
985 #define WORD_THRESHOLD_BLASTN 0
986 #define WORD_THRESHOLD_BLASTX 12
987 #define WORD_THRESHOLD_TBLASTN 13
988 #define WORD_THRESHOLD_TBLASTX 13
989 #define WORD_THRESHOLD_MEGABLAST 0
990 
991 #define UNGAPPED_X_DROPOFF_PROT 7
992 #define UNGAPPED_X_DROPOFF_NUCL 20
993 #define UNGAPPED_X_DROPOFF_MEGABLAST 10
994 
995 #define GAP_X_DROPOFF_PROT 15
996 #define GAP_X_DROPOFF_NUCL 30
997 #define GAP_X_DROPOFF_MEGABLAST 20
998 #define GAP_X_DROPOFF_TBLASTX 0
999 
1000 #define GAP_X_DROPOFF_FINAL_PROT 25
1001 #define GAP_X_DROPOFF_FINAL_NUCL 50
1002 #define GAP_X_DROPOFF_FINAL_TBLASTX 0
1003 
1004 /* reward and penalty only apply to blastn/megablast */
1005 #define PENALTY -3
1006 #define REWARD 1
1007 
1008 /********************************************************************
1009 *
1010 *	define for collecting BLAST stats.
1011 *
1012 ***********************************************************************/
1013 
1014 #define BLAST_COLLECT_STATS
1015 
1016 /********************************************************************
1017 *
1018 *	Structure to save timing info. in.  Right now this only
1019 *	works for UNIX.
1020 *
1021 ********************************************************************/
1022 
1023 typedef struct _blast_time_keeper {
1024                 FloatLo	user, 	/* CPU time in user space of the process. */
1025 			system, /* CPU time used by system. */
1026 			total;	/* total CPU time (i.e., both of the above). */
1027         } BlastTimeKeeper, PNTR BlastTimeKeeperPtr;
1028 
1029 
1030 /***************************************************************************
1031   Macros added by Andy Neuwald in order to allow easy modification of matrices.
1032 ***************************************************************************/
1033 
1034 #define  MtrxScorePosSearch(S,x,y)	((S)->posMatrix[(x)][(y)])
1035 #define  PtrMtrxScorePosSearch(S,x)	((S)->posMatrix[(x)])
1036 
1037 /*****
1038 #define  MtrxScorePosSearchi2(S,x,y)	\
1039 	((S)->posMatrix[( (x) %(S)->query_length)][(y)])
1040 #define  PtrMtrxScorePosSearch2(S,x)	\
1041 	((S)->posMatrix[( (x) %(S)->query_length)])
1042 *****/
1043 
1044 /********************************************************************
1045 
1046 	Defines for discontiguous word hits on 1st and 2nd pass.
1047 
1048 ********************************************************************/
1049 
1050 #define BLAST_NO_PASS_DISCONTIG 0
1051 #define BLAST_1ST_PASS_DISCONTIG 1
1052 #define BLAST_2ND_PASS_DISCONTIG 2
1053 #define BLAST_BOTH_PASS_DISCONTIG 3
1054 
1055 #define CODON_LENGTH 3  /* three is always the codon length. */
1056 
1057 #define BLAST_SMALL_GAPS 0
1058 #define BLAST_LARGE_GAPS 1
1059 #define MAX_INTRON_LENGTH 4000
1060 #define MAX_DBSEQ_LEN 5000000
1061 
1062 /*********************************************************************
1063     Filter types definitions
1064 *********************************************************************/
1065 
1066 #define FILTER_NONE 0
1067 #define FILTER_DUST 1
1068 #define FILTER_SEG  2
1069 
1070 typedef enum {
1071    MB_WORD_CODING = 0,
1072    MB_WORD_OPTIMAL = 1,
1073    MB_TWO_TEMPLATES = 2
1074 } MBDiscWordType;
1075 
1076 /**********************************************************************
1077 	Structure for the blast options (available to user/programmer).
1078 	This should be filled in by the "Main" program before blast
1079 	is called.
1080 
1081     If changes are made to this structure, corresponding changes should
1082     likely be made to BLAST_WizardOptionsBlk and BLAST_WizardOptionsMask.
1083 ***********************************************************************/
1084 
1085 typedef struct _blast_optionsblk {
1086 	Nlm_FloatHi gap_decay_rate,	/* decay rate. */
1087 		    gap_prob;	/* Prob of decay. */
1088         Int4	    gap_size,	/* Small gap size. */
1089 		    window_size,/* Multiple Hits window size (zero for single hit algorithm) */
1090 		threshold_first, /* Threshold for extending hits (preliminary pass), zero if one-pass algorithm is used. */
1091 		threshold_second;/* Threshold for extending hits (second pass) */
1092 	Nlm_FloatHi	expect_value, 	/* Expectation value (E) */
1093 			e2; 	  	/* Expect value for a single HSP */
1094 	/* These two scores are zero, unless they've been set, then they set
1095 	the expect_value and e2 above. */
1096 	Int4		cutoff_s,	/* score corresponds to expect_value above.*/
1097 			cutoff_s2;	/* score corresponds to e2 above. */
1098 	Boolean two_pass_method; /* should two passes be used? */
1099 	Boolean	multiple_hits_only; /* Only the multiple hits alg. used. */
1100 	Int4	hitlist_size;	/* How many hits should be returned. */
1101 	Nlm_FloatHi number_of_bits; /* Number of bits to initiate 2nd pass (default is used if zero) */
1102 	Nlm_FloatHi	dropoff_1st_pass, /* dropoff ("X") used for 1st pass. */
1103                 dropoff_2nd_pass; /* dropoff ("X") used for 2nd pass. */
1104 	Int2	number_of_cpus;	/* How many CPU's. */
1105 	CharPtr matrix;		/* name of matrix to use. */
1106 	Boolean old_stats; /* Use old stats (option may disappear later) */
1107 	Boolean do_sum_stats;   /* Should sum statistics be used? */
1108 	Boolean use_large_gaps; /* Use only large gaps for linking HSP's with sum stats. */
1109 	Int2	wordsize;	/* size of word used to find hits. */
1110 	Int2	penalty, reward; /* penalty and reward, only for blastn */
1111 	/* The ID numbers from gc.prt are used for the genetic codes. */
1112 	Int4	genetic_code,		/* genetic code for query (blastx, tblastx) */
1113 		db_genetic_code;	/* genetic code for db (tblast[nx]). */
1114         Int4 filter;          /* filter type 0 mean no filter
1115                                  non-zero value indicate filer type */
1116 	CharPtr filter_string;	/* String specifying the type of filtering and filter options. */
1117 	Boolean		gapped_calculation; /* Is a gapped calc. being done? */
1118 	/* The next three are used ONLY for gapped alignments. */
1119 	Int4		gap_open,	/* Cost to open a gap (NO extension). */
1120 			gap_extend;	/* Cost to extend a gap one letter. */
1121         Nlm_FloatHi     gap_x_dropoff,  /* X-dropoff (in bits) used by Gapped align routine. */
1122 			gap_x_dropoff_final;	/* X-dropoff (in bits) used by Gapped align routine for FINAL alignment. */
1123         Int4            decline_align;  /* Cost for declining alignment */
1124 	Nlm_FloatHi	gap_trigger; /* Score (in bits) to gap, if an HSP gaps well. */
1125 
1126 	Boolean		discontinuous;	/* Should the SeqAlign be discontinuous.*/
1127 	/* What region of the query is required for the alignment.  If start is
1128 	zero and end is -1 (the entire query), then these are not checked. */
1129 	Int4		required_start,
1130 			required_end;
1131 	Int8		db_length;	/* database size used for stat. calcul. */
1132 	Int4		dbseq_num;	/* number of database sequences used for stat. calcul. */
1133 	Nlm_FloatHi	searchsp_eff;	/* Effective search space to be used. */
1134 
1135 	/* Options for postion based blast. */
1136 	Nlm_FloatHi	ethresh;
1137 	Int4		maxNumPasses,
1138 			pseudoCountConst;
1139 	CharPtr program_name;		/* program name, for reference. */
1140 	Int4 cpu_limit;	/* timeout total. */
1141 	/* Used for region-dependent limits when storing hits. */
1142         Int4    hsp_range_max,          /* maximum hits for a range */
1143                 block_width;            /* width of a block */
1144 	Boolean perform_culling;	/* Should results be culled at all? */
1145         Boolean isPatternSearch;        /* Is this a use of PHI-BLAST?*/
1146 	CharPtr		gifile;		/* name of file containing list of gis on server */
1147 	ValNodePtr	gilist;		/* list of gis specified by client */
1148 	Boolean		do_not_reevaluate;	/* Don't perform BlastReevaluateWithAmbiguities. */
1149 	/* These options allow a subset of the database to be examined.  IF they
1150 		are set to zero, then the entire database is examined. */
1151 	Int4		first_db_seq,		/* 1st sequence in db to be compared. */
1152 			final_db_seq;		/* Final sequence to be compared. */
1153 	CharPtr		entrez_query;	/* user specified Entrez query to make selection from databases */
1154         CharPtr		org_name;	/* user specified name of organizm;  corresponding .gil file will be used */
1155 	Uint1		strand_option;	/* BLAST_TOP_STRAND, BLAST_BOTTOM_STRAND, or BLAST_BOTH_STRAND.  used by blast[nx] and tblastx */
1156 	Int4		hsp_num_max;	/* maximum number of HSP's allowed.  Zero indicates no limit. */
1157 	Uint1		tweak_parameters, /* For composition-based statistics. */
1158 			smith_waterman;
1159         Boolean         unified_p;    /* use a combination of alignment and
1160                                          compositional p-values when evaluating
1161                                          significance; ignored unless
1162                                          composition-based statisics is on. */
1163         CharPtr         phi_pattern;      /* Pattern for PHI-Blast search */
1164 	Boolean		use_real_db_size; /* Use real DB size.  meant for use if a list of gis' is submitted,
1165 					but statistics should be based upon the real database. */
1166         Boolean         use_best_align;   /* option is to use alignments choosen by user in PSM computation API (used in WWW PSI-Blast); */
1167         Int4            max_num_patterns; /* Maximum number of patterns to be used in PHI-Blast search */
1168         Boolean         is_megablast_search; /* Is this a MegaBlast search? */
1169         Uint1         no_traceback;    /* No traceback in MegaBLAST extension */
1170         Boolean         is_rps_blast;     /* If this RPS Blast ? */
1171         SeqLocPtr       query_lcase_mask; /* Masking of input DNA regions */
1172         Boolean         sort_gi_list;     /* Should the gi list be sorted? */
1173         Boolean         is_neighboring;   /* Is this a neighboring task? */
1174         Nlm_FloatHi kappa_expect_value;   /* E-value threshold for
1175                                              hits to be saved when
1176                                              RedoAlignmentCore is used
1177                                              to compute final alignments;
1178                                              should equal expect_value for
1179                                              other types of alignment. */
1180 	Boolean		explode_seqids;	/* make one SeqAlign for every gi on a
1181 					   redundant sequence. */
1182         Boolean         megablast_full_deflines; /* Print full deflines in
1183 						   megablast one-line output */
1184         Boolean         is_ooframe;  /* Use Out-Of-Frame gapping algorithm */
1185         Int4            shift_pen;   /* Out-Of-Frame shift penalty */
1186 	Boolean		gilist_already_calculated; /* translation of gis to ordinalID's already done (used for neighboring). */
1187         Boolean  recoverCheckpoint;  /* For psitblastn */
1188         Boolean  freqCheckpoint;     /* For psitblastn */
1189         CharPtr  CheckpointFileName; /* For psitblastn */
1190         Int4     longest_intron;     /* the length of longest intron for linking HSPs */
1191         FloatLo  perc_identity;      /* Identity percentage cut-off */
1192         VoidPtr  output;             /* Output stream to put results to */
1193 	FloatHi	scalingFactor;	     /* scaling factor used when constructing pssm for rpsblast. */
1194 	Int4	total_hsp_limit;	/* total number of HSP's that will be processed to SeqAligns, zero means no limit. */
1195         Boolean mb_one_base_step; /* Scan every base of the database */
1196         Int2 mb_template_length;  /* Length of the discontiguous word */
1197         Boolean mb_use_dyn_prog;  /* Use dynamic programming gapped extension in
1198                                      megablast with affine gap scores */
1199         MBDiscWordType mb_disc_type;
1200 	Uint4 NumQueries;		/*--KM for query concatenation in [t]blastn */
1201         Boolean ignore_gilist;    /* Used in traceback stage to not lookup gi's */
1202       } BLAST_OptionsBlk, PNTR BLAST_OptionsBlkPtr;
1203 
1204 
1205 /*  --------------------------------------------------------------------
1206  *
1207  *  BLAST_WizardOptionsBlk contains those fields of BLAST_OptionsBlk
1208  *  that a user can set.
1209  *
1210  *  BLAST_WizardOptionsMask contains a Boolean for each field defined in
1211  *  BLAST_WizardOptionsBlk, except those holding pointers.  TRUE means
1212  *  that the corresponding field in BLAST_WizardOptionsBlk is set.
1213  *
1214  *  These structures are used only in conjunction with BLAST_Wizard.
1215  *
1216  *  --------------------------------------------------------------------
1217  */
1218 
1219 struct _blast_wizardoptionsblk {
1220     Int4            block_width;
1221     Int4            cutoff_s;
1222     Int4            db_genetic_code;
1223     CharPtr         entrez_query;
1224     Nlm_FloatHi     ethresh;
1225     Nlm_FloatHi     expect_value;
1226     CharPtr         filter_string;
1227     Int4            first_db_seq;
1228     Int4            final_db_seq;
1229     Int4            gap_extend;
1230     Int4            gap_open;
1231     Boolean         gapped_calculation;
1232     Int4            genetic_code;
1233     ValNodePtr      gilist;
1234     Int4            hitlist_size;
1235     Int4            hsp_range_max;
1236     Boolean         is_ooframe;
1237     CharPtr         matrix;
1238     MBDiscWordType  mb_disc_type;
1239     Int2            mb_template_length;
1240     Uint1           no_traceback;
1241     Int2            penalty;
1242     FloatLo         perc_identity;
1243     Boolean         perform_culling;
1244     CharPtr         phi_pattern;
1245     Int4            pseudoCountConst;
1246     SeqLocPtr       query_lcase_mask;
1247     Int4            required_end;
1248     Int4            required_start;
1249     Int2            reward;
1250     Int8            db_length;
1251     Nlm_FloatHi     searchsp_eff;
1252     Boolean         smith_waterman;
1253     Uint1           strand_option;
1254     Int4            threshold_first;
1255     Int4            threshold_second;
1256     Uint1           tweak_parameters;
1257     Boolean         use_best_align;
1258     Boolean         use_real_db_size;
1259     Int4            window_size;
1260     Int2            wordsize;
1261 
1262     Boolean         two_hits;
1263     CharPtr         string_options;
1264 };
1265 
1266 typedef struct _blast_wizardoptionsblk
1267             BLAST_WizardOptionsBlk,
1268     PNTR    BLAST_WizardOptionsBlkPtr;
1269 
1270 struct _blast_wizardoptionsmask {
1271     Boolean         block_width;
1272     Boolean         cutoff_s;
1273     Boolean         db_genetic_code;
1274     Boolean         ethresh;
1275     Boolean         expect_value;
1276     Boolean         first_db_seq;
1277     Boolean         final_db_seq;
1278     Boolean         gap_extend;
1279     Boolean         gap_open;
1280     Boolean         gapped_calculation;
1281     Boolean         genetic_code;
1282     Boolean         hitlist_size;
1283     Boolean         hsp_range_max;
1284     Boolean         is_ooframe;
1285     Boolean         mb_disc_type;
1286     Boolean         mb_template_length;
1287     Boolean         no_traceback;
1288     Boolean         penalty;
1289     Boolean         perc_identity;
1290     Boolean         perform_culling;
1291     Boolean         pseudoCountConst;
1292     Boolean         required_end;
1293     Boolean         required_start;
1294     Boolean         reward;
1295     Boolean         db_length;
1296     Boolean         searchsp_eff;
1297     Boolean         smith_waterman;
1298     Boolean         strand_option;
1299     Boolean         threshold_first;
1300     Boolean         threshold_second;
1301     Boolean         tweak_parameters;
1302     Boolean         use_best_align;
1303     Boolean         use_real_db_size;
1304     Boolean         window_size;
1305     Boolean         wordsize;
1306 
1307     Boolean         two_hits;
1308 };
1309 
1310 typedef struct _blast_wizardoptionsmask
1311             BLAST_WizardOptionsMask,
1312     PNTR    BLAST_WizardOptionsMaskPtr;
1313 
1314 typedef enum {
1315    TEMPL_11_16 = 0,
1316    TEMPL_12_16 = 1,
1317    TEMPL_11_18 = 2,
1318    TEMPL_12_18 = 3,
1319    TEMPL_11_21 = 4,
1320    TEMPL_12_21 = 5,
1321    TEMPL_11_16_OPT = 6,
1322    TEMPL_12_16_OPT = 7,
1323    TEMPL_11_18_OPT = 8,
1324    TEMPL_12_18_OPT = 9,
1325    TEMPL_11_21_OPT = 10,
1326    TEMPL_12_21_OPT = 11,
1327    TEMPL_ERROR = -1
1328 } MBTemplateType;
1329 
1330 typedef struct _mb_parameter_blk_ {
1331    Uint1 no_traceback;    /* No traceback in greedy extension */
1332    Boolean is_neighboring;  /* Is this a neighboring task? */
1333    Boolean full_seqids;     /* Print full seqids in tabular output? */
1334    FloatLo perc_identity;   /* Identity percentage cut-off */
1335    Int4    max_positions;   /* Maximal number of positions in query of a given word */
1336    Boolean disc_word;       /* Use a discontiguous word template to find initial
1337                                matches */
1338    Boolean one_base_step;   /* Form words for every position in the database
1339                                sequence (default is every 4th position) */
1340    Int2    word_weight;     /* Number of identical nucleotides in a word match */
1341    Int2    template_length; /* Length of a discontiguous word template */
1342    Boolean use_dyn_prog;    /* Use dynamic programming extension for affine gap
1343                                scores */
1344    MBTemplateType template_type; /* Type of a discontiguous template */
1345    Boolean use_two_templates;
1346 } MegaBlastParameterBlk, PNTR MegaBlastParameterBlkPtr;
1347 
1348 /****************************************************************************
1349 
1350 	PARAMETER BLOCK: parameters for the BLAST search entered by on
1351 	command line by user.
1352 
1353 *****************************************************************************/
1354 
1355 typedef struct _blast_parameterblk {
1356         BLAST_Score     threshold,      /* threshold for extending a word hit*/
1357         		threshold_first, /* threshold for 1st pass. */
1358         		threshold_second, /* threshold for 2nd pass. */
1359                         X,              /* drop-off score for extension. */
1360 			dropoff_1st_pass, /* dropoff ("X") used for 1st pass. */
1361 			dropoff_2nd_pass, /* dropoff ("X") used for 2nd pass. */
1362                         cutoff_s,	/* Final Score to report a hit. */
1363                         cutoff_s1,	/* Score to save an HSP after a gapped extension. */
1364                         cutoff_s2,	/* Score to save an HSP after an ungapped extension. */
1365 			cutoff_s_first, /* Score (S2) to use on 1st pass */
1366 			cutoff_s_second, /* Score (S2) to use on 2nd pass and
1367 			   for "small" gaps in link_hsps (in blast.c) */
1368 	/* Max value of s2, used if s2 is set or s2 becomes larger than s. */
1369 			cutoff_s2_max,
1370 			cutoff_big_gap; /* cutoff value for a "big" gap in
1371 			   link_hsps (in blast.c). */
1372 	Nlm_FloatHi	cutoff_e,	/* Expect value to report a hit. */
1373                         cutoff_e2,	/* Expect value to report a hsp. */
1374 			number_of_bits; /* number of bits of significance, used
1375 			   to calculate cutoff_s_first (above). */
1376 	Boolean		threshold_set, /*TRUE if threshold set on command-line*/
1377 			cutoff_s_set,	/* TRUE if cutoff score set on c-l */
1378 			cutoff_s2_set,	/* TRUE if cutoff score2 set on c-l */
1379 			cutoff_e_set,	/* TRUE if cutoff expect set on c-l */
1380 			cutoff_e2_set,	/* TRUE if cutoff expect2 set on c-l */
1381 			ignore_small_gaps, /* ignore small gaps if TRUE, set by
1382 			   CalculateSecondCutoffScore in blast.c if the search
1383 			   space is smalled than 8*gap_size*gap_size. */
1384 			window_size_set;/* TRUE if window size set for MHBLAST*/
1385         Boolean         sump_option;    /* TRUE if sump is used. */
1386 	Int4		gap_size,	/* max. gap allowed for small gaps.*/
1387 			window_size;	/* used for multiple hits BLAST. */
1388 	Nlm_FloatHi	gap_prob; 	/* prob. of gap of size "gap" (above).*/
1389 	Nlm_FloatHi	gap_decay_rate; /* prob. of only one HSP */
1390 	Int2		process_num;	/* max # processrs permitted (for MP).*/
1391 	Boolean		old_stats;	/* Use "old" stats if TRUE. */
1392 	Boolean 	do_sum_stats;   /* Should sum statistics be used? */
1393 	Boolean         use_large_gaps; /* Use only large gaps for linking HSP's with sum stats. */
1394 	Boolean		two_pass_method; /* should two passes be used? */
1395 	Boolean		multiple_hits_only; /* Only the multiple hits alg. used. */
1396 	Boolean		discontinuous;	/* Should discontinuous SeqAlign's be produced? */
1397 	Boolean		gapped_calculation; /* Is a gapped calc. being done? */
1398 	Boolean		do_not_reevaluate;	/* Don't perform BlastReevaluateWithAmbiguities. */
1399 	/* The next three are used ONLY for gapped alignments. */
1400 	Int4		gap_open,	/* Cost to open a gap (NO extension). */
1401 			gap_extend,	/* Cost to extend a gap one letter. */
1402 			gap_x_dropoff,	/* X-dropoff used by Gapped align routine. */
1403 			gap_x_dropoff_final;	/* X-dropoff (in bits) used by Gapped align routine for FINAL alignment. */
1404         Int4            decline_align;  /* Cost for declining alignment */
1405 
1406 	Nlm_FloatHi	gap_trigger; /* Score (in bits) to gap, if an HSP gaps well.*/
1407 
1408 	/* Options for postion based blast. */
1409 	Nlm_FloatHi	ethresh;
1410 	Int4		maxNumPasses,
1411 			pseudoCountConst;
1412 	Int4 cpu_limit;	/* timeout total. */
1413         Int4    hsp_range_max,          /* maximum hits for a range */
1414 		max_pieces;		/* Max number of pieces allowed (query_length/block_width) */
1415 	Boolean perform_culling;	/* determines whether culling should be used or not.
1416 					If not, then hsp_range_max, block_width, and max_pieces are ignored. */
1417 	/* These options allow a subset of the database to be examined.  IF they
1418 		are set to zero, then the entire database is examined. */
1419 	Int4		first_db_seq,		/* 1st sequence in db to be compared. */
1420 			final_db_seq;		/* Final sequence to be compared. */
1421 	Int4		hsp_num_max;	/* maximum number of HSP's allowed.  Zero indicates no limit. */
1422         Boolean   use_best_align;   /* option is to use alignments choosen by user in PSM computation API (used in WWW PSI-Blast); */
1423         MegaBlastParameterBlkPtr mb_params;  /* Is this a MegaBlast search? */
1424         CharPtr filter_string;  /* String specifying the type of filtering and filter options. - used with Translated RPS Blast */
1425         Boolean is_rps_blast;      /* If this RPS Blast ? */
1426         SeqLocPtr  query_lcase_mask; /* Masking of input DNA regions */
1427 	Boolean		explode_seqids;	/* make one SeqAlign for every gi on a
1428 					   redundant sequence. */
1429         Boolean         is_ooframe;  /* Use Out-Of-Frame gapping algorithm */
1430         Int4            shift_pen;  /* Out-Of-Frame shift penalty */
1431         Int4    longest_intron;     /* the length of longest intron for linking HSPs */
1432 	FloatHi	scalingFactor;	     /* scaling factor used when constructing pssm for rpsblast. */
1433 	Int4	total_hsp_limit;	/* total number of HSP's that will be processed to SeqAligns, zero means no limit. */
1434         } BLAST_ParameterBlk, PNTR BLAST_ParameterBlkPtr;
1435 
1436 typedef Nlm_Int4	BLAST_Diag, PNTR BLAST_DiagPtr;
1437 
1438 /* Structure to keep track of the last hit and diag level. */
1439 
1440 typedef struct cfj_mod_struct{
1441     Int4 last_hit;
1442     Int4 diag_level;
1443 } CfjModStruct;
1444 /*
1445 	BLAST_ExtendWord contains information about which diagonals
1446 	have been extended over (i.e., which diagonals have been
1447 	tested).  This structure will be duplicated once for each
1448 	context as every context is different.
1449 */
1450 typedef struct _blast_extend_word {
1451 		Int4Ptr	_buffer; /* The "real" buffer for diag_level, version,
1452 				and last_hit arrays. */
1453                 CfjModStruct *combo_array;
1454                 Int4Ptr version; /* still needed?? */
1455 		Int4	actual_window; /* The actual window used if the multiple
1456 				hits method was used and a hit was found. */
1457 	} BLAST_ExtendWord, PNTR BLAST_ExtendWordPtr;
1458 
1459 /*
1460 	BLAST_ExtendWordParams contains parameters about the extensions.
1461 	Only one copy of this structure is needed, regardless of how many
1462 	contexts there are.
1463 */
1464 typedef struct _blast_extend_word_params {
1465 		Int4	bits_to_shift; /* how many bits should the diagonal be
1466 				shifted to get the "version" */
1467 		Int4	min_diag_length, /* Min. length of diagonal, actuall
1468 				2**bits_to_shift. */
1469 			min_diag_mask; /* Used to mask off everything above
1470 				min_diag_length (mask = min_diag_length-1). */
1471 		Int4	offset; /* "offset" added to query and subject position
1472 				so that "diag_level" and "last_hit" don't have
1473 				to be zeroed out every time. */
1474 		Int4	window;	/* The "window" size, within which two (or more)
1475 				hits must be found in order to be extended. */
1476 		/* Used by BLAST_ExtendWordNew to decide whether or not
1477 		to prepare the structure for multiple-hit type searches.
1478 		If TRUE, multiple hits are not neccessary, but possible. */
1479 		Boolean multiple_hits;
1480 	} BLAST_ExtendWordParams, PNTR BLAST_ExtendWordParamsPtr;
1481 /*
1482 	Data block to describe a single sequence.
1483 */
1484 
1485 typedef struct blast_sequence_block {
1486 	Uint1Ptr	sequence,	/* Actual (perhaps transl.) sequence. */
1487 		sequence_start; /* Start of sequence, used if the sequence is preceded by a NULLB.  Sequences
1488 				starting with a NULLB are used by BlastWordExtend_L1. */
1489 	Int4	length,		/* length of sequence. */
1490 		original_length,/* length before translation. */
1491 		effective_length;/* effective length, used only by query. */
1492 	Int2 frame;		/* frame of the sequence. */
1493 } BlastSequenceBlk, PNTR BlastSequenceBlkPtr;
1494 
1495 
1496 typedef struct _blast_seg {
1497 		Int2		frame;
1498 		Int4		offset;	/* start of hsp */
1499 		Int4		length;	/* length of hsp */
1500 		Int4		end;	/* end of HSP */
1501 		Int4		offset_trim;	/* start of trimmed hsp */
1502 		Int4		end_trim;	/* end of trimmed HSP */
1503 		/* Where the gapped extension (with X-dropoff) started. */
1504 		Int4		gapped_start;
1505 	} BLAST_Seg, PNTR BLAST_SegPtr;
1506 
1507 #define BLAST_NUMBER_OF_ORDERING_METHODS 2
1508 
1509 
1510 /*
1511 	The following structure is used in "link_hsps" to decide between
1512 	two different "gapping" models.  Here link is used to hook up
1513 	a chain of HSP's (this is a VoidPtr as _blast_hsp is not yet
1514 	defined), num is the number of links, and sum is the sum score.
1515 	Once the best gapping model has been found, this information is
1516 	transferred up to the BLAST_HSP.  This structure should not be
1517 	used outside of the function link_hsps.
1518 */
1519 typedef struct _blast_hsp_link {
1520 		/* Used to order the HSP's (i.e., hook-up w/o overlapping). */
1521 	VoidPtr	link[BLAST_NUMBER_OF_ORDERING_METHODS];
1522 		/* number of HSP in the ordering. */
1523 	Int2	num[BLAST_NUMBER_OF_ORDERING_METHODS];
1524 		/* Sum-Score of HSP. */
1525 	Int4	sum[BLAST_NUMBER_OF_ORDERING_METHODS];
1526 		/* Sum-Score of HSP, multiplied by the appropriate Lambda. */
1527 	Nlm_FloatHi	xsum[BLAST_NUMBER_OF_ORDERING_METHODS];
1528 	Int4 changed;
1529 	} BLAST_HSP_LINK, PNTR BLAST_HSP_LINKPtr;
1530 /*
1531 	BLAST_NUMBER_OF_ORDERING_METHODS tells how many methods are used
1532 	to "order" the HSP's.
1533 */
1534 
1535 typedef struct _blast_hsp {
1536 		struct _blast_hsp PNTR next, /* the next HSP */
1537 				  PNTR prev; /* the previous one. */
1538 		BLAST_HSP_LINK	hsp_link;
1539 /* Is this HSp part of a linked set? */
1540 		Boolean		linked_set;
1541 /* which method (max or no max for gaps) was used? */
1542 		Int2		ordering_method;
1543 /* how many HSP's make up this (sum) segment */
1544 		Int4		num;
1545 /* normalized score of a set of "linked" HSP's */
1546         Nlm_FloatHi xsum;
1547 		/* If TRUE this HSP starts a chain along the "link" pointer. */
1548 		Boolean 	start_of_chain;
1549 		BLAST_Score	score;
1550 		Int4    	num_ident;
1551 		Nlm_FloatHi	evalue;
1552 		BLAST_Seg query,	/* query sequence info. */
1553 			subject;	/* subject sequence info. */
1554 		Int2		context;	/* Context number of query */
1555                 GapXEditBlockPtr gap_info; /* ALL gapped alignment is here */
1556 		Int4 num_ref;
1557 		Int4 linked_to;
1558 /*which method if any was used for compositional adjustment?
1559   relevant only for blastp*/
1560 		Int2		comp_adjustment_method;
1561 	} BLAST_HSP, PNTR BLAST_HSPPtr;
1562 
1563 /* The helper arrays contains the info used frequently in the inner for loops. -cfj
1564  * One array of helpers will be allocated for each thread. See comments preceding
1565  * link_hsps in blast.c for more info.
1566  */
1567 
1568 typedef struct link_help_struct{
1569   BLAST_HSPPtr ptr;
1570   Int4 q_off_trim;
1571   Int4 s_off_trim;
1572   Int4 sum[BLAST_NUMBER_OF_ORDERING_METHODS];
1573   Int4 maxsum1;
1574   Int4 next_larger;
1575 } LinkHelpStruct;
1576 
1577 /* Orders information for HSP accesses. */
1578 typedef struct hsp_helper{
1579 	Int4 	qoffset,
1580 		qend;
1581 } BLAST_HSP_helper, PNTR BLAST_HSP_helperPtr;
1582 
1583 
1584 typedef struct _exact_match {
1585    Int4 q_off;
1586    Int4 s_off;
1587 } MegaBlastExactMatch, PNTR MegaBlastExactMatchPtr;
1588 
1589 typedef struct _blast_hitlist {
1590 	struct _blast_hitlist	PNTR next;
1591 	BLAST_HSPPtr PNTR	hsp_array; /* head of linked list of HSPs */
1592 	Int4		hspmax, /* max no. of HSPs allowed per hit list */
1593 			hspcnt, /* no. of HSPs in hit list */
1594 			hspcnt_max; /* no. of HSPs in hitlist, before reaping */
1595 	Boolean		further_process; /* This sequence has been found interesting,
1596 					    it should be further processed by a gapped
1597 					    alignment etc. */
1598 	Boolean		do_not_reallocate; /* Don't reallocate the HSP's, probably because
1599                        		   there is no more memory for this. */
1600         	/* added -cfj */
1601         LinkHelpStruct *lh_helper;
1602         Int4 lh_helper_size;
1603         MegaBlastExactMatchPtr exact_match_array; /* Array to hold initial
1604                                                           exact match hits */
1605         Int4 exact_match_max;
1606 	} BLAST_HitList, PNTR BLAST_HitListPtr;
1607 
1608 /*
1609 	The next two structures are the final output produced by BLAST.  Formatters should then
1610 	convert the data into SeqAligns or the BLAST ASN.1 spec.
1611 */
1612 
1613 typedef struct _blast_results_hsp {
1614 		Int2		ordering_method;/* determines whether large or small gap was used. */
1615 		Int4 		number;	/* number of HSP's used to calculate the p-value. */
1616 		BLAST_Score	score;	/* score of this HSP. */
1617 		Nlm_FloatHi	e_value,/* expect value of this set of HSP's. */
1618 				bit_score; /* above score * lambda/ln2 */
1619 		Int4		num_ident;/* number of identities in this HSP. */
1620 		Int2		context;	/* context number of query. */
1621 		Int2		query_frame, /* frame of query, non-zero if transl. */
1622 				subject_frame; /* frame of subject, non-zero if transl. */
1623 		Int4 		query_offset,	/* Start of the query HSP. */
1624 				query_length,	/* Length of the query HSP. */
1625 				subject_offset,	/* Start of the subject HSP. */
1626 				subject_length, /* Length of the subject HSP.*/
1627 				hspset_cnt;	/* which set of HSP's? */
1628 	/* Starting points (on original HSP) for a gapped extension with X dropoff. */
1629 		Int4		query_gapped_start,
1630 				subject_gapped_start;
1631 
1632                 GapXEditBlockPtr gap_info; /* ALL gapped alignment is here */
1633 		struct _blast_result_hitlist PNTR point_back;
1634 		struct _blast_heap_struct PNTR back_left, PNTR back_right;
1635 		} BLASTResultHsp, PNTR BLASTResultHspPtr;
1636 
1637 /*
1638 	The following structure contains the subject info, if the readdb
1639 	facility is not being used.  Then the subject information is
1640 	kept here.  Otherwise this structure is NULL.
1641 */
1642 typedef struct _blast_subject_info {
1643 		SeqIdPtr sip;	/* ID of the subject. */
1644 		CharPtr defline; /* Defline of the subject. */
1645 		Int4 length; 	/* untranslated length of the database sequence. */
1646 		} BLASTSubjectInfo, PNTR BLASTSubjectInfoPtr;
1647 
1648 typedef struct _blast_result_hitlist {
1649 		BLASTResultHspPtr hsp_array;	/* An array holding the HSP's. */
1650 		Nlm_FloatHi	best_evalue;	/* best evalue in all the HSP's. */
1651 		Int4	high_score; 	/* HSP with highest score. */
1652 		Int4	hspcnt,		/* Number of HSP's. */
1653 			subject_id;	/* ID of the subject. */
1654 		Int2    db_id;          /* ID (0,1,2...) of the db if multiple db's searched. */
1655 		Int4    subject_length; /* length of the database sequence. */
1656 		BLASTSubjectInfoPtr subject_info; /* Subject info if the readdb facility is not being used. */
1657 		SeqAlignPtr seqalign; /* alignment, if this a gapped calculation. */
1658 		Int4 num_ref;
1659 		} BLASTResultHitlist, PNTR BLASTResultHitlistPtr;
1660 
1661 
1662 typedef struct _blast_heap_struct {
1663   Int4 cutvalue;	/* start of a region? */
1664   BLASTResultHspPtr PNTR heap;
1665   Int4 num_in_heap;	/* Number in 'heap' */
1666   Int4 num_of_ref;
1667   struct _blast_heap_struct PNTR next, PNTR prev;
1668 } BLASTHeapStruct, PNTR BLASTHeapPtr;
1669 
1670 /*
1671 	Holds the results already saved.
1672 */
1673 
1674 typedef struct _blast_results_struct {
1675 
1676 		BLASTResultHitlistPtr PNTR results;
1677 		Int4	hitlist_count,	/* Number of hitlists saved on results array already. */
1678 			hitlist_max, 	/* Length of results array. */
1679 			max_pieces;	/* For range-dependent limits. */
1680 		BLASTResultHspPtr **heap;
1681         	Int4 *num_in_heap;
1682 		BLASTHeapPtr heap_ptr;
1683 		} BLASTResultsStruct, PNTR BLASTResultsStructPtr;
1684 
1685 /*
1686 	Holds the data for all possible words that might be used by BLAST.
1687 */
1688 
1689 typedef struct _blast_all_words {
1690 		Uint1Ptr *array,	/* All the possible words */
1691 			 array_storage; /* Storage for the words in array. */
1692 		Int4 	num_of_cols,
1693 			wordsize;
1694 		Boolean rows_allocated,	/* are the rows (of length the wordsize) alloc.*/
1695 			specific;	/* specific (limited) words are to be indexed. */
1696 	} BlastAllWord, *BlastAllWordPtr;
1697 
1698 /*
1699 	Contains gi and ordinal number for use by random access BLAST.
1700 */
1701 typedef struct _double_int4 {
1702         Int4    gi,
1703                 ordinal_id,
1704 		start;
1705 } BlastDoubleInt4, *BlastDoubleInt4Ptr;
1706 
1707 
1708 typedef struct _blast_gi_list {
1709     BlastDoubleInt4Ptr gi_list;	/* List of gi's. */
1710     BlastDoubleInt4Ptr *gi_list_pointer;	/* Pointer to above list. */
1711     Int4 current;	       /* Current position in gi list. */
1712     Int4 total;		       /* total number of gi's. */
1713     Boolean gilist_not_owned; /* do not delete gilist at end. */
1714 } BlastGiList, *BlastGiListPtr;
1715 
1716 /*
1717 	used for keeping start and stop of hits to query, for ALU filtering.
1718 */
1719 typedef struct _blast_hit_range {
1720 	BlastDoubleInt4Ptr      range_list;        /* ranges. */
1721         BlastDoubleInt4Ptr      *range_list_pointer;       /* Pointer to above list. */
1722 	Int4		current,	/* current position in list. */
1723 			total;		/* total number in list. */
1724 	SeqIdPtr	query_id;	/* ID to be put on SeqLoc's that are produced. */
1725 	Int4		base_offset;	/* used if a SeqLoc is searched and it does not start at begining
1726 					of sequence. */
1727 	} BlastHitRange, *BlastHitRangePtr;
1728 
1729 /*
1730 	Contains BLAST error messages.
1731 */
1732 
1733 typedef struct _blast_error_msg {
1734     Uint2 level;/* corresponds to levels of ErrPostEx [none(0), info(1), warn(2), error(3) and fatal(4)] */
1735     CharPtr msg;
1736 } BlastErrorMsg, *BlastErrorMsgPtr;
1737 
1738 /*
1739   Holds data for each "context" (which is generally equal to
1740   one frame of the query).  blastx would have six contexts,
1741   blastp would have one.
1742   */
1743 
1744 typedef struct _blast_context_structure {
1745     Boolean query_allocated;/* The BlastSequenceBlkPtr IS allocated. */
1746     BlastSequenceBlkPtr query;  /* query sequence. */
1747     BLAST_ExtendWordPtr ewp;/* keep track of diagonal etc. for each frame */
1748     ValNodePtr location;    /* Where to start/stop masking. */
1749 } BLASTContextStruct, PNTR BLASTContextStructPtr;
1750 
1751     /* Structure used for full Smith-Waterman results. */
1752 
1753 typedef struct SWResults {
1754     Uint1Ptr seq;
1755     Int4 seqStart;
1756     Int4 seqEnd;
1757     Int4 queryStart;
1758     Int4 queryEnd;
1759     Int4 *reverseAlignScript;
1760     BLAST_Score score;
1761     BLAST_Score scoreThisAlign;
1762     Nlm_FloatHi eValue;
1763     Nlm_FloatHi eValueThisAlign;
1764     Nlm_FloatHi Lambda;
1765     Nlm_FloatHi logK;
1766     SeqIdPtr subject_id;  /*used to display the sequence in alignment*/
1767     struct SWResults *next;
1768     Boolean isFirstAlignment;
1769     Int4 subject_index;  /* needed to break ties on rare occasions */
1770     SeqAlignPtr seqAlign; /*needed when seqAlign is already computed*/
1771 } SWResults;
1772 
1773 /* Average sizes of protein and nucl. sequences. */
1774 #define BLAST_AA_AVGLEN 300
1775 #define BLAST_NT_AVGLEN 1000
1776 
1777 /* How many ticks should be emitted total. */
1778 #define BLAST_NTICKS 50
1779 
1780 /* period of sending out a star/message. */
1781 #define STAR_MSG_PERIOD 60
1782 
1783 typedef struct _BlastThrInfo {
1784 
1785     TNlmMutex db_mutex;  /*lock for access to database*/
1786     TNlmMutex results_mutex; /*lock for storing results */
1787     TNlmMutex callback_mutex; /*lock for issuing update ticks on the screen*/
1788     /* Mutex for recalculation of ambiguities, in BlastReevaluateWithAmbiguities */
1789     TNlmMutex ambiguities_mutex;
1790 
1791     /*
1792       GI List to be used if database will be searched by GI.
1793       current is the current element in the array being worked on.
1794       global_gi_being_used specifies that it will be used.
1795       */
1796     Int4 gi_current;
1797     BlastGiListPtr blast_gi_list;
1798 
1799     /* Number of database sequences for each thread to process. */
1800     Int4 db_chunk_size;
1801 
1802     /* The last db sequence to be assigned.  Used only in get_db_chunk after
1803        the acquisition of the "db_mutex" (above). */
1804     Int4 db_chunk_last;
1805 
1806     /* the last sequence in the database to be compared against. */
1807     Int4 final_db_seq;
1808     Int4 number_seqs_done;  /*number of sequences already tested*/
1809     Int4 db_incr;  /*size of a database chunk to get*/
1810     Int4 last_db_seq;
1811 
1812     /* How many positive hits were found (set by ReapHitlist, read by tick_proc
1813        and star_proc). */
1814     Int4 number_of_pos_hits;
1815 
1816     /* Use by star_proc to determine whether to emit a star. */
1817     time_t last_tick;
1818 
1819     /* tells star_proc to check that a star should be emitted. */
1820     TNlmThread awake_thr;
1821     Boolean awake;
1822 
1823     /* tells index_proc to check that a message should be emitted. */
1824     TNlmThread index_thr;
1825     Boolean awake_index;
1826 
1827     /*
1828       Callback functions to indicate progress, or lack thereof.
1829       */
1830     int (LIBCALLBACK *tick_callback)PROTO((Int4 done, Int4 positives));
1831     int (LIBCALLBACK *star_callback)PROTO((Int4 done, Int4 positives));
1832     int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives));
1833 
1834     /* whether real databases are done */
1835     Boolean	realdb_done;
1836 
1837 } BlastThrInfo, PNTR BlastThrInfoPtr;
1838 
1839 /*
1840 	Structure used for matrix rescaling.
1841 */
1842 
1843 typedef struct _blast_matrix_rescale {
1844 	Int4 		alphabet_size,
1845 			query_length;	/* length of query. */
1846 	Uint1Ptr	query;
1847 	Nlm_FloatHi 	*standardProb;
1848 	Int4Ptr  	*matrix;
1849 	Int4Ptr  	*private_matrix;
1850 	BLAST_KarlinBlkPtr 	*kbp_std,
1851 				*kbp_psi,
1852 				*kbp_gap_std,
1853 				*kbp_gap_psi;
1854 	Nlm_FloatHi	lambda_ideal,
1855                 	K_ideal;
1856 } BlastMatrixRescale, *BlastMatrixRescalePtr;
1857 
1858 
1859 /*
1860 	The central structure for the BLAST search.  This structure
1861 	should contain data (or pointers to data) for all the
1862 	information in a BLAST search.
1863 */
1864 
1865 
1866 #define BLAST_SEARCH_ALLOC_QUERY 1
1867 #define BLAST_SEARCH_ALLOC_SUBJECT 2
1868 #define BLAST_SEARCH_ALLOC_PBP 4
1869 #define BLAST_SEARCH_ALLOC_SBP 8
1870 #define BLAST_SEARCH_ALLOC_WFP_FIRST 16
1871 #define BLAST_SEARCH_ALLOC_WFP_SECOND 32
1872 #define BLAST_SEARCH_ALLOC_EWPPARAMS 64
1873 #define BLAST_SEARCH_ALLOC_CONTEXT 128
1874 #define BLAST_SEARCH_ALLOC_RESULTS 256
1875 #define BLAST_SEARCH_ALLOC_READDB 512
1876 #define BLAST_SEARCH_ALLOC_TRANS_INFO 1024
1877 #define BLAST_SEARCH_ALLOC_ALL_WORDS 2048
1878 #define BLAST_SEARCH_ALLOC_QUERY_SLP 4096
1879 #define BLAST_SEARCH_ALLOC_THRINFO 8192
1880 #define BLAST_SEARCH_ALLOC_MASK1 16384
1881 
1882 typedef struct blast_search_block {
1883     Int4 allocated;
1884 /* bit fields specify which structures from below are allocated.  If
1885 a field is allocated, then it's bit is non-zero.
1886 
1887 		structure     		bit-field (define)
1888 		-----------------------------------------
1889 		query			BLAST_SEARCH_ALLOC_QUERY
1890 		subject			BLAST_SEARCH_ALLOC_SUBJECT
1891 		pbp			BLAST_SEARCH_ALLOC_PBP
1892 		sbp			BLAST_SEARCH_ALLOC_SBP
1893 		wfp_first       	BLAST_SEARCH_ALLOC_WFP_FIRST
1894 		wfp_second      	BLAST_SEARCH_ALLOC_WFP_SECOND
1895 		ewp_params		BLAST_SEARCH_ALLOC_EWPPARAMS
1896 		context			BLAST_SEARCH_ALLOC_CONTEXT
1897 		result_struct		BLAST_SEARCH_ALLOC_RESULTS
1898 		rdfp	        	BLAST_SEARCH_ALLOC_READDB
1899 		translation_table       BLAST_SEARCH_ALLOC_TRANS_INFO
1900 		translation_table_rc
1901 		all_words		BLAST_SEARCH_ALLOC_ALL_WORDS
1902 		query_slp		BLAST_SEARCH_ALLOC_QUERY_SLP
1903 		mask1			BLAST_SEARCH_ALLOC_MASK1
1904 */
1905 
1906 /*
1907   Specifies whether the search is position based or not.
1908   */
1909     Boolean positionBased;
1910     Boolean posConverged;
1911     /*
1912       Specifies that the query sequence was invalid (e.g., XXXXXXXXXXXXXXXXXXXXXX).
1913       */
1914     Boolean query_invalid;
1915     /* Specifies that the search timed out (i.e., cpu time limit was reached). */
1916     Boolean timed_out;
1917     /*
1918       The BLASTContextStructPtr is an array and each element contains
1919       information about the query sequence and the frame number.
1920       If there are six frames (e.g., blastx) then the BLASTContextStructPtr
1921       is six elements long; if there's one frame (e.g., blastp) then
1922       BLASTContextStructPtr is one element long.
1923 
1924       number_of_contexts states how long the context array is.
1925       */
1926     BLASTContextStructPtr context;
1927     Int2	first_context,
1928         last_context;
1929     /*
1930        The GapAlignBlkPtr used by ALIGN (in gapxdrop.c) for gapped alignments.
1931        */
1932 
1933     GapAlignBlkPtr gap_align;
1934 
1935     /*
1936       All the possible words.
1937       */
1938     BlastAllWordPtr all_words;
1939     /*
1940         Set the context_factor, which specifies how many different
1941         ways the query or db is examined (e.g., blastn looks at both
1942         stands of query, context_factor is 2).
1943         */
1944     Int2 context_factor;
1945 
1946     /*
1947       What type of search (e.g., blastp, blastx, etc.)?
1948       */
1949     CharPtr prog_name;
1950     Uint1 prog_number;
1951     /*
1952       translation_table and translation_table_rc holds the translation
1953       from ncbi2na to ncbistdaa for normal and reverse-complement
1954       translations.  Only used and initialized with tblast[nx].
1955       Initialized by GetPrivatTranslationTable
1956       */
1957     Uint1Ptr translation_table,
1958         translation_table_rc;
1959 
1960     /*
1961       ValNodePtr containing error messages.
1962       */
1963     ValNodePtr error_return;
1964 
1965     /*
1966       ValNodePtr containing masking SeqLocPtr's
1967       */
1968     ValNodePtr mask;
1969     ValNodePtr mask1;
1970     /*
1971       What genetic codes are we using to translate the query or database
1972       when needed.  Based upon NCBI genetic codes.
1973       */
1974     CharPtr genetic_code,		/* genetic code used for query. */
1975         db_genetic_code;	/* genetic code used for database. */
1976 
1977     /*
1978 	The BlastSequenceBlk's subject hold info about the subject.
1979 	Info about the original sequence is in original_seq.  This will
1980 	be NULL if the sequence was not translated.
1981         */
1982     Uint1Ptr translation_buffer;	/* Buffer for (tblast[nx]) db translations*/
1983     Int4 translation_buffer_size;	/* size of translation_buffer. */
1984     CharPtr original_seq;	/* Original (i.e.,  untransl.) sequence. */
1985     BlastSequenceBlkPtr	subject;/* subject sequence. */
1986 
1987 
1988     /* KM-- info about individual queries from a concatenated query in
1989        blastn or tblastn */
1990     struct queries PNTR mult_queries;	/* struct defined in blastconcat.h */
1991 
1992 
1993     /*
1994       SeqLocPtr for the query, owned by the called and not by BLAST.
1995       */
1996     SeqLocPtr query_slp;
1997 
1998     /* Id's for the query and subject. */
1999     SeqIdPtr		query_id;	/* ID for the query, any form. */
2000     Int4			subject_id;	/* the number of the subject, in the DB. */
2001     BLAST_ParameterBlkPtr pbp;	/* options selected. */
2002     BLAST_ScoreBlkPtr sbp;		/* info on scoring. */
2003     BLAST_ExtendWordParamsPtr ewp_params; /* parameters for extensions.*/
2004 
2005     /* 	For the two-pass method two BLAST_WordFinderPtr's are required.
2006 	The actual wfp's are in wfp_first and wfp_second.  "wfp" is just
2007 	a pointer to one of those two.  If they have been allocated (at all)
2008 	is signified by setting the bit-fields above.
2009         */
2010     BLAST_WordFinderPtr     wfp, 	/* find initial words. */
2011         wfp_first, /* words for first pass. */
2012         wfp_second;/* words for second pass. */
2013     /*	For the two-pass this should be set to TRUE on the first (preliminary)
2014 	pass and FALSE on the second pass.
2015         */
2016     Boolean			prelim;
2017 /*
2018   The "current" hit, that is the one being worked on right now.
2019   If a hitlist is deemed significant, then "current_hitlist" is
2020   moved to "seqalign".  current_hitlist_purge specifies
2021   whether the hitlist should be purged after each call to a
2022   WordFinder; it will generally be purged except for non-initial
2023   frames of tblast[nx].
2024   */
2025     Boolean			current_hitlist_purge;
2026     BLAST_HitListPtr	current_hitlist;
2027 
2028     BlastSequenceBlkPtr	PNTR query_dnap; /* query DNAP sequence. */
2029 
2030     /*
2031       The worst evalue seen by this thread so far.
2032       Only filled in if the hitlist is already full, otherwise
2033       it should be DBL_MAX.
2034       */
2035     Nlm_FloatHi	worst_evalue;
2036     /*
2037       Size of the HSP array on the "current_hitlist"
2038       */
2039     Int4 hsp_array_size;
2040     /*
2041       Contains hits that are significant.
2042       */
2043     Int4			result_size;
2044     BLASTResultsStructPtr	result_struct;
2045 
2046     Int8			dblen;	/* total length of the database. */
2047     Int8                    dblen_eff;      /* effective length of the database. */
2048     Int8                    dblen_eff_real;      /* effective length of the database. */
2049     Int4                    dbseq_num;      /* number of sequences in the database. */
2050     Int4                    length_adjustment; /* amount removed from end of query and db sequences. */
2051     Nlm_FloatHi		searchsp_eff;	/* Effective search space (used for statistics). */
2052     Int4            rps_qlen; /* original query sequence length (RPS-BLAST only) */
2053     ReadDBFILEPtr		rdfp, /* I/O PTR for database files. */
2054         rdfp_list;	/* linked rdfp list of all databases. */
2055     /* The subject info (id and defline) is kept here for the current sequence
2056        if the readdb facility is not used.  This structure should only
2057        be used if rdfp is NULL.
2058        */
2059     BLASTSubjectInfoPtr subject_info;
2060 
2061     /* Data used in threads - previously global variables */
2062 
2063     BlastThrInfoPtr thr_info;
2064 
2065     /*
2066       start and stop of query that must be included for an alignment
2067       to be counted.  The Boolean whole_query specifies whether these
2068       are valid (i.e., have been set) or not.
2069       */
2070     Boolean whole_query;
2071     Int4 required_start, required_end;
2072 
2073     /*
2074       Callback functions to indicate progress, or lack thereof.
2075 */
2076     /* int (LIBCALLBACK *tick_callback)PROTO((Int4 done, Int4 positives)); */
2077     /* int (LIBCALLBACK *star_callback)PROTO((Int4 done, Int4 positives)); */
2078     /*
2079       Callback function to handle results (e.g., print them out for neighboring)
2080       in place of BlastSaveCurrentHitlist.
2081       */
2082     int (LIBCALLBACK *handle_results)PROTO((VoidPtr search));
2083     /*
2084 	Output stream to put results to
2085         */
2086     VoidPtr		output;
2087     /*
2088       These "counters" keep track of how often certain operations
2089       were performed.
2090 
2091       This counting is performed only if BLAST_COLLECT_STATS is defined.
2092       */
2093     Int8	first_pass_hits,	/* no. of hits on 1st pass. */
2094         second_pass_hits,	/* no. of hits on 2nd pass. */
2095         second_pass_trys,	/* no. of seqs that made it to 2nd pass. */
2096         first_pass_extends,	/* no. extended on 1st pass. */
2097         second_pass_extends,	/* no. extended on 2nd pass. */
2098         first_pass_good_extends,/* no. successfully extended on 1st pass. */
2099         second_pass_good_extends,/* no. successfully extended on 2nd pass. */
2100         number_of_seqs_better_E,/* how many sequences were better than E. */
2101         prelim_gap_no_contest,	/* No. of HSP's under E=10 alone. */
2102         prelim_gap_passed,	/* No. of HSP's that passed prelim gapping. */
2103         prelim_gap_attempts,	/* No. of HSP's we attempted to gap. */
2104         real_gap_number_of_hsps, /* How many HSP's were gapped in BlastGetGappedScore. */
2105         semid;                  /* Here will be stored ID of load-ballance semaphore */
2106     GreedyAlignMemPtr abmp; /* Memory for megablast greedy extension */
2107     Int4 PNTR query_context_offsets; /* offsets for all queries and strands in a
2108                                         concatenated sequence */
2109     SeqIdPtr PNTR qid_array; /* Ids of all queries in Mega BLAST search */
2110     BLASTResultsStructPtr PNTR mb_result_struct; /* one result struct per query
2111                                                     for Mega BLAST */
2112     ValNodePtr mb_endpoint_results; /* Points to linked list of results  */
2113 } BlastSearchBlk, PNTR BlastSearchBlkPtr;
2114 
2115 typedef struct _blast_hsp_segment {
2116    Int4 q_start, q_end;
2117    Int4 s_start, s_end;
2118    struct _blast_hsp_segment PNTR next;
2119 } BLASTHSPSegment, PNTR BLASTHSPSegmentPtr;
2120 
2121 #ifdef __cplusplus
2122 }
2123 #endif
2124 #endif /* !__BLASTSTR__ */
2125