1 /*  $Id: blast_posit.h 103491 2007-05-04 17:18:18Z kazimird $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Alejandro Schaffer (ported by Christiam Camacho)
27  *
28  */
29 
30 /** @file blast_posit.h
31  * Port of posit.h structures and impalaScaling for implementing composition
32  * based statistics for PSI-BLAST.
33  */
34 
35 #ifndef ALGO_BLAST_CORE___BLAST_POSIT__H
36 #define ALGO_BLAST_CORE___BLAST_POSIT__H
37 
38 #include <algo/blast/core/ncbi_std.h>
39 #include <algo/blast/core/blast_stat.h>
40 #include "matrix_freq_ratios.h"
41 
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
45 
46 /** number of real aminoacids (i.e.: does not include U, X, B, etc) */
47 #define PRO_TRUE_ALPHABET_SIZE 20
48 /** range of scores in a matrix */
49 #define kScoreMatrixScoreRange 10000
50 
51 /** positions of true characters in protein alphabet*/
52 extern const Int4 trueCharPositions[PRO_TRUE_ALPHABET_SIZE];
53 
54 /** Structure used to pass data into the scaling routines. All fields marked as
55  * alias are not owned by this structure. */
56 typedef struct Kappa_posSearchItems {
57     /** PSSM */
58     int**               posMatrix;
59     /** Scaled PSSM [alias] */
60     int**               posPrivateMatrix;
61     /** PSSM's frequency ratios [alias] */
62     double**            posFreqs;
63     /** Frequecy ratios for underlying scoring matrix */
64     SFreqRatios*        stdFreqRatios;
65     /** Length of the query sequence, specifies the number of columns in the
66      * matrices in this structure */
67     unsigned int        queryLength;
68 } Kappa_posSearchItems;
69 
70 /** Structure used to pass data into the scaling routines. All fields marked as
71  * aliases refer to fields in the BlastScoreBlk structure and are NOT owned by
72  * this structure */
73 typedef struct Kappa_compactSearchItems {
74     /** Query sequence data in ncbistdaa format without sentinel bytes [alias]*/
75     Uint1*              query;
76     /** Length of the sequence above */
77     int                 qlength;
78     /** Size of the alphabet @sa BLASTAA_SIZE */
79     int                 alphabetSize;
80     /** Standard substitution scoring matrix [alias] */
81     int**               matrix;
82     /** Ungapped Karlin-Altschul parameters [alias] */
83     Blast_KarlinBlk**   kbp_std;
84     /** Ungapped PSI-BLAST Karlin-Altschul parameters [alias] */
85     Blast_KarlinBlk**   kbp_psi;
86     /** Gapped Karlin-Altschul parameters [alias] */
87     Blast_KarlinBlk**   kbp_gap_std;
88     /** Gapped PSI-BLAST Karlin-Altschul parameters [alias] */
89     Blast_KarlinBlk**   kbp_gap_psi;
90     /** Lambda calculated using standard residue compositions for the query and
91      * database sequences */
92     double              lambda_ideal;
93     /** K calculated using standard residue compositions for the query and
94      * database sequences */
95     double              K_ideal;
96     /** Array of standard residue probabilities, as those returned by
97      * BLAST_GetStandardAaProbabilities */
98     double*             standardProb;
99 
100 } Kappa_compactSearchItems;
101 
102 /** Allocates a new Kappa_posSearchItems structure
103  * @param queryLength length of the query sequence [in]
104  * @param matrix_name name of the underlying matrix name to use [in]
105  * @param posPrivateMatrix scaled pssm, allocated with dimensions queryLength
106  * by BLASTAA_SIZE. This is owned by the caller [in|out]
107  * @param posFreqs PSSM's frequency ratios, allocated with dimensions
108  * queryLength by BLASTAA_SIZE. This is owned by the caller [in|out]
109  * @return newly allocated structure or NULL if out of memory
110  */
111 Kappa_posSearchItems*
112 Kappa_posSearchItemsNew(unsigned int queryLength,
113                         const char* matrix_name,
114                         int** posPrivateMatrix,
115                         double** posFreqs);
116 
117 /** Deallocates the Kappa_posSearchItems structure.
118  * @param posSearchItems data structure to deallocate [in]
119  * @return NULL
120  */
121 Kappa_posSearchItems*
122 Kappa_posSearchItemsFree(Kappa_posSearchItems* posSearchItems);
123 
124 /** Creates a new Kappa_compactSearchItems structure
125  * @param query query sequence data in ncbistdaa format without sentinel
126  * bytes [in]
127  * @param queryLength length of the sequence above [in]
128  * @param sbp BLAST scoring block structure [in]
129  * @return newly allocated structure or NULL if out of memory
130  */
131 Kappa_compactSearchItems*
132 Kappa_compactSearchItemsNew(const Uint1* query, unsigned int queryLength,
133                             BlastScoreBlk* sbp);
134 
135 /** Deallocates the Kappa_compactSearchItems structure.
136  * @param compactSearchItems data structure to deallocate [in]
137  * @return NULL
138  */
139 Kappa_compactSearchItems*
140 Kappa_compactSearchItemsFree(Kappa_compactSearchItems* compactSearchItems);
141 
142 /** Copied from posit2.c
143  * @return 0 on success, 1 on failure
144  */
145 int Kappa_impalaScaling(Kappa_posSearchItems* posSearch,
146                         Kappa_compactSearchItems* compactSearch,
147                         double scalingFactor,
148                         Boolean doBinarySearch,
149                         BlastScoreBlk* sbp);
150 
151 #ifdef __cplusplus
152 }
153 #endif
154 
155 #endif /* !ALGO_BLAST_CORE__BLAST_POSIT__H */
156