1 /* $Id: blast_posit.h 103491 2007-05-04 17:18:18Z kazimird $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Author: Alejandro Schaffer (ported by Christiam Camacho) 27 * 28 */ 29 30 /** @file blast_posit.h 31 * Port of posit.h structures and impalaScaling for implementing composition 32 * based statistics for PSI-BLAST. 33 */ 34 35 #ifndef ALGO_BLAST_CORE___BLAST_POSIT__H 36 #define ALGO_BLAST_CORE___BLAST_POSIT__H 37 38 #include <algo/blast/core/ncbi_std.h> 39 #include <algo/blast/core/blast_stat.h> 40 #include "matrix_freq_ratios.h" 41 42 #ifdef __cplusplus 43 extern "C" { 44 #endif 45 46 /** number of real aminoacids (i.e.: does not include U, X, B, etc) */ 47 #define PRO_TRUE_ALPHABET_SIZE 20 48 /** range of scores in a matrix */ 49 #define kScoreMatrixScoreRange 10000 50 51 /** positions of true characters in protein alphabet*/ 52 extern const Int4 trueCharPositions[PRO_TRUE_ALPHABET_SIZE]; 53 54 /** Structure used to pass data into the scaling routines. All fields marked as 55 * alias are not owned by this structure. */ 56 typedef struct Kappa_posSearchItems { 57 /** PSSM */ 58 int** posMatrix; 59 /** Scaled PSSM [alias] */ 60 int** posPrivateMatrix; 61 /** PSSM's frequency ratios [alias] */ 62 double** posFreqs; 63 /** Frequecy ratios for underlying scoring matrix */ 64 SFreqRatios* stdFreqRatios; 65 /** Length of the query sequence, specifies the number of columns in the 66 * matrices in this structure */ 67 unsigned int queryLength; 68 } Kappa_posSearchItems; 69 70 /** Structure used to pass data into the scaling routines. All fields marked as 71 * aliases refer to fields in the BlastScoreBlk structure and are NOT owned by 72 * this structure */ 73 typedef struct Kappa_compactSearchItems { 74 /** Query sequence data in ncbistdaa format without sentinel bytes [alias]*/ 75 Uint1* query; 76 /** Length of the sequence above */ 77 int qlength; 78 /** Size of the alphabet @sa BLASTAA_SIZE */ 79 int alphabetSize; 80 /** Standard substitution scoring matrix [alias] */ 81 int** matrix; 82 /** Ungapped Karlin-Altschul parameters [alias] */ 83 Blast_KarlinBlk** kbp_std; 84 /** Ungapped PSI-BLAST Karlin-Altschul parameters [alias] */ 85 Blast_KarlinBlk** kbp_psi; 86 /** Gapped Karlin-Altschul parameters [alias] */ 87 Blast_KarlinBlk** kbp_gap_std; 88 /** Gapped PSI-BLAST Karlin-Altschul parameters [alias] */ 89 Blast_KarlinBlk** kbp_gap_psi; 90 /** Lambda calculated using standard residue compositions for the query and 91 * database sequences */ 92 double lambda_ideal; 93 /** K calculated using standard residue compositions for the query and 94 * database sequences */ 95 double K_ideal; 96 /** Array of standard residue probabilities, as those returned by 97 * BLAST_GetStandardAaProbabilities */ 98 double* standardProb; 99 100 } Kappa_compactSearchItems; 101 102 /** Allocates a new Kappa_posSearchItems structure 103 * @param queryLength length of the query sequence [in] 104 * @param matrix_name name of the underlying matrix name to use [in] 105 * @param posPrivateMatrix scaled pssm, allocated with dimensions queryLength 106 * by BLASTAA_SIZE. This is owned by the caller [in|out] 107 * @param posFreqs PSSM's frequency ratios, allocated with dimensions 108 * queryLength by BLASTAA_SIZE. This is owned by the caller [in|out] 109 * @return newly allocated structure or NULL if out of memory 110 */ 111 Kappa_posSearchItems* 112 Kappa_posSearchItemsNew(unsigned int queryLength, 113 const char* matrix_name, 114 int** posPrivateMatrix, 115 double** posFreqs); 116 117 /** Deallocates the Kappa_posSearchItems structure. 118 * @param posSearchItems data structure to deallocate [in] 119 * @return NULL 120 */ 121 Kappa_posSearchItems* 122 Kappa_posSearchItemsFree(Kappa_posSearchItems* posSearchItems); 123 124 /** Creates a new Kappa_compactSearchItems structure 125 * @param query query sequence data in ncbistdaa format without sentinel 126 * bytes [in] 127 * @param queryLength length of the sequence above [in] 128 * @param sbp BLAST scoring block structure [in] 129 * @return newly allocated structure or NULL if out of memory 130 */ 131 Kappa_compactSearchItems* 132 Kappa_compactSearchItemsNew(const Uint1* query, unsigned int queryLength, 133 BlastScoreBlk* sbp); 134 135 /** Deallocates the Kappa_compactSearchItems structure. 136 * @param compactSearchItems data structure to deallocate [in] 137 * @return NULL 138 */ 139 Kappa_compactSearchItems* 140 Kappa_compactSearchItemsFree(Kappa_compactSearchItems* compactSearchItems); 141 142 /** Copied from posit2.c 143 * @return 0 on success, 1 on failure 144 */ 145 int Kappa_impalaScaling(Kappa_posSearchItems* posSearch, 146 Kappa_compactSearchItems* compactSearch, 147 double scalingFactor, 148 Boolean doBinarySearch, 149 BlastScoreBlk* sbp); 150 151 #ifdef __cplusplus 152 } 153 #endif 154 155 #endif /* !ALGO_BLAST_CORE__BLAST_POSIT__H */ 156