1 /* ===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================*/
24 /*****************************************************************************
25 
26 File name: blastkar.h
27 
28 Author: Tom Madden
29 
30 Contents: definitions and prototypes used by blastkar.c to calculate BLAST
31 	statistics.
32 
33 ******************************************************************************/
34 
35 /* $Revision: 6.41 $
36 * $Log: blastkar.h,v $
37 * Revision 6.41  2005/10/06 12:49:39  madden
38 * Add prototypes for BlastKarlinGetNuclAlphaBeta and BlastKarlinBlkNuclGappedCalc
39 *
40 * Revision 6.40  2005/07/27 17:48:57  coulouri
41 * remove hardcoded paths
42 *
43 * Revision 6.39  2004/09/28 16:04:19  papadopo
44 * From Michael Gertz:
45 * 1. Pass the effective database size into BlastSmallGapSumE,
46 *     BlastLargeGapSumE and BlastUnevenGapSumE.  The routines use this
47 *     value in a simplified formula to compute the e-value of singleton sets.
48 * 2. Caused all routines for calculating the significance of multiple
49 *     distinct alignments (BlastSmallGapSumE, BlastLargeGapSumE and
50 *     BlastUnevenGapSumE) to use
51 *
52 *        sum_{i in linked_set} (\lambda_i s_i - \ln K_i)
53 *
54 *     as the weighted sum score, where (\lambda_i, K_i) are taken from
55 *     the appropriate query context.
56 *
57 * Revision 6.38  2004/04/28 14:36:34  madden
58 * Changes from Mike Gertz:
59 *  - Added a function prototype for BlastGapDecayDivisor
60 *   - Removed the function prototypes for  BlastGapDecay and BlastGapDecayInverse
61 *   - Modified function prototypes for BlastSmallGapSumE, BlastLargeGapSumE and
62 *   BlastUnevenGapSumE
63 *
64 * Revision 6.37  2004/04/23 13:20:25  madden
65 * Change: BLAST_KARLIN_K_SUMLIMIT_DEFAULT from 0.01 to 0.00001
66 *
67 * Revision 6.36  2004/03/31 18:39:24  kans
68 * fixed typo in BlastBlastComputeLengthAdjustment
69 *
70 * Revision 6.35  2004/03/31 17:58:51  papadopo
71 * Mike Gertz' changes for length adjustment calculations
72 *
73 * Revision 6.34  2003/12/12 16:00:46  madden
74 * Add gap_decay_rate to BlastCutoffs, remove BlastCutoffs_simple, removal of defunct _real variables (all from Mike Gertz)
75 *
76 * Revision 6.33  2003/11/28 22:39:41  camacho
77 * +static keyword to BlastKarlinLtoH
78 *
79 * Revision 6.32  2003/11/26 19:09:09  madden
80 * Remove ref to BlastKarlinLambdaBis (no longer needed per Mike Gertz)
81 *
82 * Revision 6.31  2003/02/27 19:07:56  madden
83 * Add functions PrintMatrixMessage and PrintAllowedValuesMessage
84 *
85 * Revision 6.30  2003/02/26 18:23:50  madden
86 * Add functions BlastKarlinkGapBlkFill and BlastKarlinReportAllowedValues, call from BlastKarlinBlkGappedCalcEx
87 *
88 * Revision 6.29  2002/12/04 13:28:37  madden
89 * Add effective length parameters
90 *
91 * Revision 6.28  2002/09/03 14:21:50  camacho
92 * Changed type of karlinK from FloatHi to Nlm_FloatHi
93 *
94 * Revision 6.27  2002/08/29 13:58:08  camacho
95 * Added field to store K parameter associated with posMatrix
96 *
97 * Revision 6.26  2002/03/18 21:31:56  madden
98 * Added comments
99 *
100 * Revision 6.25  2000/12/28 16:23:24  madden
101 * Function getAlphaBeta from AS
102 *
103 * Revision 6.24  2000/12/26 17:46:20  madden
104 * Add function BlastKarlinGetMatrixValuesEx2 to return alpha and beta
105 *
106 * Revision 6.23  2000/11/24 22:07:33  shavirin
107 * Added new function BlastResFreqFree().
108 *
109 * Revision 6.22  2000/09/27 21:27:15  dondosha
110 * Added original_matrix member to BLAST_Matrix structure
111 *
112 * Revision 6.21  2000/09/12 16:03:51  dondosha
113 * Added functions to create and destroy matrix used in txalign
114 *
115 * Revision 6.20  2000/08/31 15:45:07  dondosha
116 * Added function BlastUnevenGapSumE for sum evalue computation with different gap size on two sequences
117 *
118 * Revision 6.19  2000/08/23 18:50:02  madden
119 * Changes to support decline-to-align checking
120 *
121 * Revision 6.18  2000/08/04 15:49:26  sicotte
122 * added BlastScoreBlkMatCreateEx(reward,penalty) and BlastKarlinGetDefaultMatrixValues as external functions
123 *
124 * Revision 6.17  2000/07/07 21:20:08  vakatov
125 * Get all "#include" out of the 'extern "C" { }' scope!
126 *
127 * Revision 6.16  2000/05/26 17:29:55  shavirin
128 * Added array of pos frequencies into BLAST_Matrix structure and it's
129 * handling.
130 *
131 * Revision 6.15  2000/04/17 20:41:37  madden
132 * Added BLAST_MatrixFetch
133 *
134 * Revision 6.14  1999/12/22 21:06:35  shavirin
135 * Added new function BlastPSIMaxScoreGet().
136 *
137 * Revision 6.13  1999/09/16 17:38:42  madden
138 * Add posFreqs for position-specific frequencies
139 *
140 * Revision 6.12  1999/03/17 16:49:11  madden
141 * Removed comment within comment
142 *
143 * Revision 6.11  1998/12/31 18:17:05  madden
144 * Added strand option
145 *
146  * Revision 6.10  1998/09/11 19:02:07  madden
147  * Added paramC
148  *
149  * Revision 6.9  1998/07/17 15:39:58  madden
150  * Changes for Effective search space.
151  *
152  * Revision 6.8  1998/04/24 19:27:55  madden
153  * Added BlastKarlinBlkStandardCalcEx for ideal KarlinBlk
154  *
155  * Revision 6.7  1998/04/10 15:05:49  madden
156  * Added pref_flags return by value to BlastKarlinGetMatrixValues
157  *
158  * Revision 6.6  1998/03/09 17:15:04  madden
159  * Added BlastKarlinGetMatrixValues
160  *
161  * Revision 6.5  1998/02/26 22:34:35  madden
162  * Changes for 16 bit windows
163  *
164  * Revision 6.4  1998/02/06 18:28:07  madden
165  * Added functions to produce pseudo-scores from p and e values
166  *
167  * Revision 6.3  1997/11/14 17:14:57  madden
168  * Added Karlin parameter to matrix structure
169  *
170  * Revision 6.2  1997/11/07 00:48:10  madden
171  * Added defintitions and functions for BLAST_Matrix
172  *
173  * Revision 6.1  1997/09/22 17:36:26  madden
174  * MACROS for position-specific matrices from Andy Neuwald
175  *
176  * Revision 6.0  1997/08/25 18:52:41  madden
177  * Revision changed to 6.0
178  *
179  * Revision 1.16  1997/08/19 18:19:49  madden
180  * BLAST_Score is Int4, not long
181  *
182  * Revision 1.15  1997/07/14 15:31:07  madden
183  * Changed call to BlastKarlinBlkGappedCalc
184  *
185  * Revision 1.14  1997/05/01 15:53:16  madden
186  * Addition of extra KarlinBlk's for psi-blast
187  *
188  * Revision 1.13  1997/01/22  17:46:30  madden
189  * Added BLAST_ScorePtr PNTR posMatrix.
190  *
191  * Revision 1.12  1996/12/16  14:35:48  madden
192  * Removed gapped_calculation Boolean.
193  *
194  * Revision 1.11  1996/12/10  17:30:59  madden
195  * Changed statistics for gapped blastp
196  *
197  * Revision 1.10  1996/12/03  19:13:47  madden
198  * Made BlastRes functions non-static.
199  *
200  * Revision 1.9  1996/11/26  19:55:25  madden
201  * Added check for matrices in standard places.
202  *
203  * Revision 1.8  1996/11/18  14:49:26  madden
204  * Rewrote BlastScoreSetAmbigRes to take multiple ambig. chars.
205  *
206  * Revision 1.7  1996/11/08  21:45:03  madden
207  * Added BLASTNA_SEQ_CODE define.
208  *
209  * Revision 1.6  1996/10/03  20:49:29  madden
210  * Added function BlastKarlinBlkStandardCalc to calculate standard
211  * Karlin parameters for blastx and tblastx.
212  *
213  * Revision 1.5  1996/10/02  21:43:31  madden
214  * Replaced kbp and sfp with arrays of same.
215  *
216  * Revision 1.4  1996/09/30  21:56:12  madden
217  * Replaced query alphabet of ncbi2na with blastna alphabet.
218  *
219  * Revision 1.3  1996/09/10  19:40:35  madden
220  * Added function BlastScoreBlkMatCreate for blastn matrices.
221  *
222  * Revision 1.2  1996/08/22  20:38:11  madden
223  * Changed all doubles and floats to Nlm_FloatHi.
224  *
225  * Revision 1.1  1996/08/05  19:47:42  madden
226  * Initial revision
227  *
228  * Revision 1.19  1996/06/21  15:23:54  madden
229  * Corelibed BlastSumP.
230  *
231  * Revision 1.18  1996/06/21  15:14:55  madden
232  * Made some functions static, added LIBCALL to others.
233  *
234  * Revision 1.17  1996/06/04  15:34:55  madden
235  * *** empty log message ***
236  *
237  * Revision 1.16  1996/05/22  20:38:05  madden
238  * *** empty log message ***
239  *
240  * Revision 1.15  1996/05/20  21:18:51  madden
241  * Added BLASTMatrixStructure.
242  *
243  * Revision 1.14  1996/05/16  19:50:15  madden
244  * Added documentation block.
245  *
246  * Revision 1.13  1996/04/24  12:53:12  madden
247  * *** empty log message ***
248  *
249  * Revision 1.12  1996/04/16  15:34:10  madden
250  * renamed variable in BlastLargeGapSumE.
251  *
252  * Revision 1.11  1996/03/25  16:34:19  madden
253  * Changes to mimic old statistics.
254  *
255  * Revision 1.10  1996/02/15  23:20:35  madden
256  * Added query length to a number of calls.
257  *
258  * Revision 1.9  1996/01/17  23:19:13  madden
259  * Added BlastScoreSetAmbigRes.
260  *
261  * Revision 1.8  1996/01/17  17:01:19  madden
262  * Fixed BlastSmallGapSumE  and BlastLargeGapSumE.
263  *
264  * Revision 1.7  1996/01/17  13:46:55  madden
265  * Added prototype for BlastKarlinPtoE.
266  *
267  * Revision 1.6  1996/01/06  18:58:28  madden
268  * Added prototype for BlastSumP.
269  *
270  * Revision 1.5  1995/12/30  18:39:27  madden
271  * added q_frame and s_frame to KarlinBlkPtr.
272  *
273  * Revision 1.4  1995/12/26  23:05:19  madden
274  * *** empty log message ***
275  *
276  * Revision 1.3  1995/12/26  20:28:10  madden
277  * Replaced BLAST_ScoreMat wiht BLAST_ScorePtr PNTR.
278  *
279  * Revision 1.2  1995/12/26  14:26:22  madden
280  * *** empty log message ***
281  *
282  * Revision 1.1  1995/12/21  23:07:35  madden
283  * Initial revision
284  *
285  *
286  * */
287 #ifndef __BLASTKAR__
288 #define __BLASTKAR__
289 
290 #include <ncbi.h>
291 #include <objalign.h>
292 
293 #ifdef __cplusplus
294 extern "C" {
295 #endif
296 
297 /*
298 	Defines for the matrix 'preferences' (as specified by S. Altschul).
299 */
300 #define BLAST_MATRIX_NOMINAL 0
301 #define BLAST_MATRIX_PREFERRED 1
302 #define BLAST_MATRIX_BEST 2
303 
304 
305 /****************************************************************************
306 For more accuracy in the calculation of K, set K_SUMLIMIT to 0.00001.
307 For high speed in the calculation of K, use a K_SUMLIMIT of 0.001
308 Note:  statistical significance is often not greatly affected by the value
309 of K, so high accuracy is generally unwarranted.
310 *****************************************************************************/
311 /* K_SUMLIMIT_DEFAULT == sumlimit used in BlastKarlinLHtoK() */
312 #define BLAST_KARLIN_K_SUMLIMIT_DEFAULT 0.0001
313 
314 /* LAMBDA_ACCURACY_DEFAULT == accuracy to which Lambda should be calc'd */
315 #define BLAST_KARLIN_LAMBDA_ACCURACY_DEFAULT    (1.e-5)
316 
317 /* LAMBDA_ITER_DEFAULT == no. of iterations in LambdaBis = ln(accuracy)/ln(2)*/
318 #define BLAST_KARLIN_LAMBDA_ITER_DEFAULT        17
319 
320 /* Initial guess for the value of Lambda in BlastKarlinLambdaNR */
321 #define BLAST_KARLIN_LAMBDA0_DEFAULT    0.5
322 
323 #define BLAST_KARLIN_K_ITER_MAX 100
324 #define BLAST_SUMP_EPSILON_DEFAULT 0.002 /* accuracy for SumP calculations */
325 
326 /*
327 	Maps the ncbi4na alphabet to blastna, an alphabet that blastn uses
328 	as the first four characters have the same representation as in
329 	ncbi2na.
330 */
331 #define BLASTNA_SIZE 16
332 /* Identifies the blastna alphabet, for use in blast only. */
333 #define BLASTNA_SEQ_CODE 99
334 #define PSI_ALPHABET_SIZE  26 /* For PSI Blast this is the only 26 */
335 
336 extern Uint1 ncbi4na_to_blastna[BLASTNA_SIZE];
337 
338 extern Uint1 blastna_to_ncbi4na[BLASTNA_SIZE];
339 
340 
341 /*************************************************************************
342 	Structure to the Karlin-Blk parameters.
343 
344 	This structure was (more or less) copied from the old
345 	karlin.h.
346 **************************************************************************/
347 
348 typedef struct {
349 		Nlm_FloatHi	Lambda; /* Lambda value used in statistics */
350 		Nlm_FloatHi	K, logK; /* K value used in statistics */
351 		Nlm_FloatHi	H; /* H value used in statistics */
352 		Nlm_FloatHi	paramC;	/* for use in seed. */
353 	} BLAST_KarlinBlk, PNTR BLAST_KarlinBlkPtr;
354 
355 
356 
357 
358 /********************************************************************
359 
360 	Structures relating to scoring or the BLAST_ScoreBlk
361 
362 ********************************************************************/
363 
364 /* BLAST_Score must be a signed datatype */
365 typedef Int4    BLAST_Score, PNTR BLAST_ScorePtr;
366 
367 /*
368 SCORE_MIN is (-2**31 + 1)/2 because it has been observed more than once that
369 a compiler did not properly calculate the quantity (-2**31)/2.  The list
370 of compilers which failed this operation have at least at some time included:
371 NeXT and a version of AIX/370's MetaWare High C R2.1r.
372 For this reason, SCORE_MIN is not simply defined to be LONG_MIN/2.
373 */
374 #define BLAST_SCORE_MIN	INT2_MIN
375 #define BLAST_SCORE_MAX	INT2_MAX
376 
377 
378 #if defined(OS_DOS) || defined(OS_MAC)
379 #define BLAST_SCORE_1MIN (-100)
380 #define BLAST_SCORE_1MAX ( 100)
381 #else
382 #define BLAST_SCORE_1MIN (-10000)
383 #define BLAST_SCORE_1MAX ( 1000)
384 #endif
385 #define BLAST_SCORE_RANGE_MAX	(BLAST_SCORE_1MAX - BLAST_SCORE_1MIN)
386 
387 typedef struct _blast_score_freq {
388 		BLAST_Score	score_min, score_max;
389 		BLAST_Score	obs_min, obs_max;
390 		Nlm_FloatHi	score_avg;
391 		Nlm_FloatHi	PNTR sprob0, PNTR sprob;
392 	} BLAST_ScoreFreq, PNTR BLAST_ScoreFreqPtr;
393 
394 #define BLAST_MATRIX_SIZE 32
395 
396 typedef struct _blast_matrix_struct {
397 		BLAST_ScorePtr matrix[BLAST_MATRIX_SIZE];
398 		BLAST_Score long_matrix[BLAST_MATRIX_SIZE*BLAST_MATRIX_SIZE];
399 	} BLASTMatrixStructure, PNTR BLASTMatrixStructurePtr;
400 
401 typedef struct _blast_scoreblk {
402 	Boolean		protein_alphabet; /* TRUE if alphabet_code is for a
403 protein alphabet (e.g., ncbistdaa etc.), FALSE for nt. alphabets. */
404 	Uint1		alphabet_code;	/* NCBI alphabet code. */
405 	Int2 		alphabet_size;  /* size of alphabet. */
406 	Int2 		alphabet_start;  /* numerical value of 1st letter. */
407 	BLASTMatrixStructurePtr matrix_struct;	/* Holds info about matrix. */
408 	BLAST_ScorePtr PNTR matrix;  /* Substitution matrix */
409 	BLAST_ScorePtr PNTR posMatrix;  /* Sub matrix for position depend BLAST. */
410     Nlm_FloatHi karlinK; /* Karlin-Altschul parameter associated with posMatrix */
411 	Int2		mat_dim1, mat_dim2;	/* dimensions of matrix. */
412 	BLAST_ScorePtr	maxscore; /* Max. score for each letter */
413 	BLAST_Score	loscore, hiscore; /* Min. & max. substitution scores */
414 	BLAST_Score	penalty, reward; /* penalty and reward for blastn. */
415 	Boolean		read_in_matrix; /* If TRUE, matrix is read in, otherwise
416 					produce one from penalty and reward above. */
417 	BLAST_ScoreFreqPtr PNTR sfp;	/* score frequencies. */
418 	Nlm_FloatHi **posFreqs; /*matrix of position specific frequencies*/
419 	/* kbp & kbp_gap are ptrs that should be set to kbp_std, kbp_psi, etc. */
420 	BLAST_KarlinBlkPtr 	PNTR kbp; 	/* Karlin-Altschul parameters. */
421 	BLAST_KarlinBlkPtr 	PNTR kbp_gap; /* K-A parameters for gapped alignments. */
422 	/* Below are the Karlin-Altschul parameters for non-position based ('std')
423 	and position based ('psi') searches. */
424 	BLAST_KarlinBlkPtr 	*kbp_std,
425 				*kbp_psi,
426 				*kbp_gap_std,
427 				*kbp_gap_psi;
428 	BLAST_KarlinBlkPtr 	kbp_ideal;	/* Ideal values (for query with average database composition). */
429 	Int2 number_of_contexts;	/* Used by sfp and kbp, how large are these*/
430 	Int2		matid;		/* id number of matrix. */
431 	CharPtr 	name;		/* name of matrix. */
432 	Uint1Ptr 	ambiguous_res;	/* Array of ambiguous res. (e.g, 'X', 'N')*/
433 	Int2		ambig_size,	/* size of array above. */
434 			ambig_occupy;	/* How many occupied? */
435 	ValNodePtr	comments;	/* Comments about matrix. */
436 /**** Andy's modification ****/
437 	Int4    	query_length;   /* the length of the query. */
438 /**** end Andy's modification ****/
439 	Int4	length_adjustment; /* length to trim query/db sequence by. */
440 	Int4	effective_query_length; /* shortened query length. */
441 	Int8	effective_db_length;	/* trimmed db length */
442 	Int8	effective_search_sp;	/* product of above two */
443         Boolean round_down; /* round down to next even score if the score is odd. */
444 	} BLAST_ScoreBlk, PNTR BLAST_ScoreBlkPtr;
445 
446 /* Used for communicating between BLAST and other applications. */
447 typedef struct _blast_matrix {
448 		Boolean is_prot;	/* Matrix is for proteins */
449 		CharPtr name;		/* Name of Matrix (i.e., BLOSUM62). */
450 		/* Position-specific BLAST rows and columns are different, otherwise they are the
451 		alphabet length. */
452 		Int4	rows,		/* query length + 1 for PSSM. */
453 			columns;	/* alphabet size in all cases (26). */
454 		Int4Ptr PNTR matrix;
455                 Nlm_FloatHi ** posFreqs;
456 		Nlm_FloatHi karlinK;
457                 Int4Ptr PNTR original_matrix;
458 } BLAST_Matrix, PNTR BLAST_MatrixPtr;
459 
460 typedef struct _blast_rescomp {
461                 Uint1	alphabet_code;
462                 Int4Ptr	comp, 	/* composition of alphabet, array starts at beginning of alphabet. */
463 			comp0;	/* Same array as above, starts at zero. */
464         } BLAST_ResComp, PNTR BLAST_ResCompPtr;
465 
466 typedef struct _blast_resfreq {
467 		Uint1		alphabet_code;
468                 Nlm_FloatHi PNTR prob;	/* probs, (possible) non-zero offset. */
469 		Nlm_FloatHi PNTR prob0; /* probs, zero offset. */
470         } BLAST_ResFreq, PNTR BLAST_ResFreqPtr;
471 
472 BLAST_ScoreBlkPtr LIBCALL BLAST_ScoreBlkNew PROTO((Uint1 alphabet, Int2 number_of_contexts));
473 
474 BLAST_ScoreBlkPtr LIBCALL BLAST_ScoreBlkDestruct PROTO((BLAST_ScoreBlkPtr sbp));
475 
476 Int2 LIBCALL BlastScoreSetAmbigRes PROTO((BLAST_ScoreBlkPtr sbp, Char ambiguous_res));
477 
478 
479 Int2 LIBCALL BlastScoreBlkFill PROTO((BLAST_ScoreBlkPtr sbp, CharPtr string, Int4 length, Int2 context_number));
480 
481 Int2 LIBCALL BlastScoreBlkMatFill PROTO((BLAST_ScoreBlkPtr sbp, CharPtr matrix));
482 BLAST_ScorePtr PNTR LIBCALL BlastScoreBlkMatCreateEx(BLAST_ScorePtr PNTR matrix,BLAST_Score penalty, BLAST_Score reward);
483 
484 Int2 LIBCALL BlastScoreBlkMatRead PROTO((BLAST_ScoreBlkPtr sbp, FILE *fp));
485 
486 Int2 LIBCALL BlastScoreBlkMaxScoreSet PROTO((BLAST_ScoreBlkPtr sbp));
487 BLAST_ScorePtr BlastPSIMaxScoreGet(BLAST_ScorePtr PNTR posMatrix,
488                                    Int4 start, Int4 length);
489 
490 BLAST_ResCompPtr LIBCALL BlastResCompNew PROTO((BLAST_ScoreBlkPtr sbp));
491 
492 BLAST_ResCompPtr LIBCALL BlastResCompDestruct PROTO((BLAST_ResCompPtr rcp));
493 
494 Int2 LIBCALL BlastResCompStr PROTO((BLAST_ScoreBlkPtr sbp, BLAST_ResCompPtr rcp, CharPtr str, Int4 length));
495 
496 /*
497 Produces a Karlin Block, and parameters, with standard protein frequencies.
498 */
499 Int2 LIBCALL BlastKarlinBlkStandardCalc PROTO((BLAST_ScoreBlkPtr sbp, Int2 context_start, Int2 context_end));
500 BLAST_KarlinBlkPtr LIBCALL BlastKarlinBlkStandardCalcEx PROTO((BLAST_ScoreBlkPtr sbp));
501 
502 
503 
504 /*
505 	Functions taken from the OLD karlin.c
506 */
507 
508 BLAST_KarlinBlkPtr LIBCALL BlastKarlinBlkCreate PROTO((void));
509 
510 BLAST_KarlinBlkPtr LIBCALL BlastKarlinBlkDestruct PROTO((BLAST_KarlinBlkPtr));
511 
512 Int2 LIBCALL BlastKarlinBlkCalc PROTO((BLAST_KarlinBlkPtr kbp, BLAST_ScoreFreqPtr sfp));
513 
514 Int2 LIBCALL BlastKarlinBlkGappedCalc PROTO((BLAST_KarlinBlkPtr kbp, Int4 gap_open, Int4 gap_extend, CharPtr matrix_name, ValNodePtr PNTR error_return));
515 
516 Int2 LIBCALL BlastKarlinBlkGappedCalcEx PROTO((BLAST_KarlinBlkPtr kbp, Int4 gap_open, Int4 gap_extend, Int4 decline_align, CharPtr matrix_name, ValNodePtr PNTR error_return));
517 
518 
519 /*
520         Attempts to fill KarlinBlk for given gap opening, extensions etc.
521         Will return non-zero status if that fails.
522 
523         return values:  -1 if matrix_name is NULL;
524                         1 if matrix not found
525                         2 if matrix found, but open, extend etc. values not supported.
526 */
527 Int2 LIBCALL BlastKarlinkGapBlkFill(BLAST_KarlinBlkPtr kbp, Int4 gap_open, Int4 gap_extend, Int4 decline_align, CharPtr matrix_name);
528 
529 /* Prints a messages about the allowed matrices, BlastKarlinkGapBlkFill should return 1 before this is called. */
530 CharPtr PrintMatrixMessage(const Char *matrix);
531 
532 /* Prints a messages about the allowed open etc values for the given matrix,
533 BlastKarlinkGapBlkFill should return 2 before this is called. */
534 CharPtr PrintAllowedValuesMessage(const Char *matrix, Int4 gap_open, Int4 gap_extend, Int4 decline_align);
535 
536 Int2 LIBCALL BlastKarlinReportAllowedValues(const Char *matrix_name, ValNodePtr PNTR error_return);
537 
538 
539 Nlm_FloatHi BlastKarlinLHtoK PROTO((BLAST_ScoreFreqPtr sfp, Nlm_FloatHi lambda, Nlm_FloatHi H));
540 
541 Nlm_FloatHi BlastKarlinLambdaNR PROTO((BLAST_ScoreFreqPtr sfp));
542 
543 BLAST_Score LIBCALL BlastKarlinEtoS PROTO((Nlm_FloatHi  E, BLAST_KarlinBlkPtr kbp, Nlm_FloatHi  qlen, Nlm_FloatHi  dblen));
544 
545 BLAST_Score LIBCALL BlastKarlinEtoS_simple PROTO((Nlm_FloatHi  E, BLAST_KarlinBlkPtr kbp, Nlm_FloatHi searchsp));
546 
547 Nlm_FloatHi LIBCALL BlastKarlinPtoE PROTO((Nlm_FloatHi p));
548 
549 Nlm_FloatHi LIBCALL BlastKarlinEtoP PROTO((Nlm_FloatHi x));
550 
551 Nlm_FloatHi LIBCALL BlastKarlinStoP PROTO((BLAST_Score S, BLAST_KarlinBlkPtr kbp, Nlm_FloatHi  qlen, Nlm_FloatHi  dblen));
552 
553 Nlm_FloatHi LIBCALL BlastKarlinStoP_simple PROTO((BLAST_Score S, BLAST_KarlinBlkPtr kbp, Nlm_FloatHi  searchsp));
554 
555 Nlm_FloatHi LIBCALL BlastKarlinStoE PROTO((BLAST_Score S, BLAST_KarlinBlkPtr kbp, Nlm_FloatHi  qlen, Nlm_FloatHi  dblen));
556 
557 Nlm_FloatHi LIBCALL BlastKarlinStoE_simple PROTO((BLAST_Score S, BLAST_KarlinBlkPtr kbp, Nlm_FloatHi  searchsp));
558 
559 Nlm_FloatHi LIBCALL BlastGapDecayDivisor PROTO((Nlm_FloatHi decayrate, unsigned nsegs ));
560 
561 Int2 LIBCALL BlastCutoffs PROTO((BLAST_ScorePtr S, Nlm_FloatHi PNTR E, BLAST_KarlinBlkPtr kbp, Nlm_FloatHi search_sp, Nlm_Boolean dodecay,
562   Nlm_FloatHi gap_decay_rate));
563 
564 Nlm_FloatHi LIBCALL BlastKarlinStoLen PROTO((BLAST_KarlinBlkPtr kbp, BLAST_Score S));
565 
566 /* SumP function. Called by BlastSmallGapSumE and BlastLargeGapSumE. */
567 Nlm_FloatHi LIBCALL BlastSumP PROTO((Int4 r, Nlm_FloatHi s));
568 
569 /* Functions to calculate SumE (for large and small gaps). */
570 Nlm_FloatHi LIBCALL BlastSmallGapSumE PROTO((Int4 start_points, Int2 num,  Nlm_FloatHi xsum, Int4 query_length, Int4 subject_length, Int8 dblen_eff, Nlm_FloatHi weight_divisor));
571 
572 Nlm_FloatHi LIBCALL BlastUnevenGapSumE PROTO((Int4 query_start_points, Int4 subject_start_points, Int2 num, Nlm_FloatHi xsum, Int4 query_length, Int4 subject_length, Int8 dblen_eff, Nlm_FloatHi weight_divisor));
573 
574 Nlm_FloatHi LIBCALL BlastLargeGapSumE PROTO((Int2 num,  Nlm_FloatHi xsum, Int4 query_length, Int4 subject_length, Int8 dblen_eff, Nlm_FloatHi weight_divisor));
575 
576 /* Used to produce random sequences. */
577 CharPtr  LIBCALL BlastRepresentativeResidues PROTO((Int2 length));
578 
579 Int2 LIBCALL BlastResFreqNormalize PROTO((BLAST_ScoreBlkPtr sbp, BLAST_ResFreqPtr rfp, Nlm_FloatHi norm));
580 
581 BLAST_ResFreqPtr LIBCALL BlastResFreqNew PROTO((BLAST_ScoreBlkPtr sbp));
582 void LIBCALL BlastResFreqFree PROTO((BLAST_ResFreqPtr rfp));
583 
584 BLAST_ResFreqPtr LIBCALL BlastResFreqDestruct PROTO((BLAST_ResFreqPtr rfp));
585 
586 Int2 LIBCALL BlastResFreqString PROTO((BLAST_ScoreBlkPtr sbp, BLAST_ResFreqPtr rfp, CharPtr string, Int4 length));
587 
588 Int2 LIBCALL BlastResFreqStdComp PROTO((BLAST_ScoreBlkPtr sbp, BLAST_ResFreqPtr rfp));
589 
590 Int2 LIBCALL BlastResFreqResComp PROTO((BLAST_ScoreBlkPtr sbp, BLAST_ResFreqPtr rfp, BLAST_ResCompPtr rcp));
591 
592 Int2 LIBCALL BlastResFreqClr PROTO((BLAST_ScoreBlkPtr sbp, BLAST_ResFreqPtr rfp));
593 
594 BLAST_ScoreFreqPtr BlastScoreFreqNew PROTO((BLAST_Score score_min, BLAST_Score score_max));
595 
596 BLAST_ScoreFreqPtr BlastScoreFreqDestruct PROTO((BLAST_ScoreFreqPtr sfp));
597 
598 BLAST_MatrixPtr LIBCALL BLAST_MatrixDestruct PROTO((BLAST_MatrixPtr blast_matrix));
599 
600 BLAST_MatrixPtr LIBCALL BLAST_MatrixFill PROTO((BLAST_ScoreBlkPtr sbp, Boolean positionBased));
601 
602 BLAST_MatrixPtr LIBCALL BLAST_MatrixFetch PROTO((CharPtr matrix_name));
603 
604 
605 Int2 LIBCALL BlastGetStdAlphabet PROTO((Uint1 alphabet_code, Uint1Ptr residues, Int4 residues_size));
606 /*
607 Functions used to convert between Stephen's pseudo scores
608 and E or p-values.
609 */
610 Int2 ConvertPtoPseudoS PROTO((Nlm_FloatHi p, Nlm_FloatHi n));
611 Int2 ConvertEtoPseudoS PROTO((Nlm_FloatHi E, Nlm_FloatHi searchsp));
612 Nlm_FloatHi ConvertPseudoStoE PROTO((Int2 s, Nlm_FloatHi n));
613 
614 ScorePtr MakeBlastScore PROTO((ScorePtr PNTR old, CharPtr scoretype, Nlm_FloatHi prob, Int4 score));
615 
616 
617 /*
618 Obtains arrays of the allowed opening and extension penalties for gapped BLAST for
619 the given matrix.  Also obtains arrays of Lambda, K, and H.  The pref_flags field is
620 used for display purposes, with the defines: BLAST_MATRIX_NOMINAL, BLAST_MATRIX_PREFERRED, and
621 BLAST_MATRIX_BEST.
622 
623 Any of these fields that
624 are not required should be set to NULL.  The Int2 return value is the length of the
625 arrays.
626 */
627 
628 Int2 LIBCALL BlastKarlinGetMatrixValues PROTO((CharPtr matrix, Int4Ptr PNTR open, Int4Ptr PNTR extension, FloatHiPtr PNTR lambda, FloatHiPtr PNTR K, FloatHiPtr PNTR H, Int4Ptr PNTR pref_flags));
629 
630 Int2 LIBCALL BlastKarlinGetMatrixValuesEx PROTO((CharPtr matrix, Int4Ptr PNTR open, Int4Ptr PNTR extension, Int4Ptr PNTR decline_align, FloatHiPtr PNTR lambda, FloatHiPtr PNTR K, FloatHiPtr PNTR H, Int4Ptr PNTR pref_flags));
631 
632 Int2 LIBCALL BlastKarlinGetMatrixValuesEx2 PROTO((CharPtr matrix, Int4Ptr PNTR open, Int4Ptr PNTR extension, Int4Ptr PNTR decline_align, FloatHiPtr PNTR lambda, FloatHiPtr PNTR K, FloatHiPtr PNTR H, FloatHiPtr PNTR alpha, FloatHiPtr PNTR beta, Int4Ptr PNTR pref_flags));
633 
634 void LIBCALL getAlphaBeta PROTO((CharPtr matrixName, Nlm_FloatHi *alpha,
635 Nlm_FloatHi *beta, Boolean gapped, Int4 gap_open, Int4 gap_extend));
636 
637 Int2 LIBCALL BlastKarlinGetDefaultMatrixValues PROTO((CharPtr matrix, Int4Ptr open, Int4Ptr extension, FloatHiPtr lambda, FloatHiPtr K, FloatHiPtr H));
638 
639 Int4Ptr PNTR LIBCALL BlastMatrixToTxMatrix PROTO((BLAST_MatrixPtr matrix));
640 Int4Ptr PNTR LIBCALL TxMatrixDestruct PROTO((Int4Ptr PNTR txmatrix));
641 
642 Int2 BlastKarlinGetNuclAlphaBeta PROTO((Int4 reward, Int4 penalty, Int4 gap_open,
643                             Int4 gap_extend, BLAST_KarlinBlkPtr kbp,
644                             Boolean gapped_calculation,
645                             double *alpha, double *beta));
646 
647 Int2 BlastKarlinBlkNuclGappedCalc PROTO((BLAST_KarlinBlk* kbp, Int4 gap_open,
648                               Int4 gap_extend, Int4 reward, Int4 penalty,
649                               BLAST_KarlinBlk* kbp_ungap,
650                               Boolean* round_down,
651                               ValNodePtr* error_return));
652 
653 Int4
654 BlastComputeLengthAdjustment(Nlm_FloatHi K,
655                                   Nlm_FloatHi logK,
656                                   Nlm_FloatHi alpha_d_lambda,
657                                   Nlm_FloatHi beta,
658                                   Int4 query_length,
659                                   Int8 db_length,
660                                   Int4 db_num_seqs,
661                                   Int4 * length_adjustment);
662 
663 #ifdef __cplusplus
664 }
665 #endif
666 #endif /* !__BLASTKAR__ */
667