1 /* =========================================================================== 2 * 3 * PUBLIC DOMAIN NOTICE 4 * National Center for Biotechnology Information 5 * 6 * This software/database is a "United States Government Work" under the 7 * terms of the United States Copyright Act. It was written as part of 8 * the author's official duties as a United States Government employee and 9 * thus cannot be copyrighted. This software/database is freely available 10 * to the public for use. The National Library of Medicine and the U.S. 11 * Government have not placed any restriction on its use or reproduction. 12 * 13 * Although all reasonable efforts have been taken to ensure the accuracy 14 * and reliability of the software and data, the NLM and the U.S. 15 * Government do not and cannot warrant the performance or results that 16 * may be obtained by using this software or data. The NLM and the U.S. 17 * Government disclaim all warranties, express or implied, including 18 * warranties of performance, merchantability or fitness for any particular 19 * purpose. 20 * 21 * Please cite the author in any work or product based on this material. 22 * 23 * ===========================================================================*/ 24 /***************************************************************************** 25 26 File name: blastconcatdef.h 27 28 Author: Karolina Maciag, Aleksandr Morgulis 29 30 Contents: type definitions and function prototypes for query 31 multiplexing code. 32 33 ******************************************************************************/ 34 /* $Revision: 1.5 $ 35 * $Log: blastconcatdef.h,v $ 36 * Revision 1.5 2005/09/26 15:02:58 morgulis 37 * Fixing some memort leaks when using query concatenation in blastn and tblastn. 38 * 39 * Revision 1.4 2005/01/10 18:52:29 coulouri 40 * fixes from morgulis to allow concatenation of >255 queries in [t]blastn 41 * 42 * Revision 1.3 2004/04/20 14:55:47 morgulis 43 * 1. Fixed query offsets in results when -B option is used. 44 * 2. Fixes for lower case masking handling with -B option. 45 * 46 * Revision 1.2 2003/12/29 15:42:46 coulouri 47 * tblastn query concatenation fixes from morgulis 48 * 49 * Revision 1.1 2003/03/24 20:47:28 madden 50 * Utilities for concatenation of blastn/tblastn queries 51 * 52 * */ 53 54 #ifndef _BLASTCONCATDEF_ 55 #define _BLASTCONCATDEF_ 56 57 #include <blastconcat.h> 58 59 /* AM: MQ_ResultInfo is a structure containing the information about the 60 current number of results, best evalue and score for a particular 61 query. Those are used to ensure correct printing of results in 62 the case of multiple queries. One such structure is maintained per 63 input query. */ 64 typedef struct mq_ResultInfo 65 { 66 Uint4 NumResults; 67 BLASTResultHitlistPtr PNTR results; 68 } MQ_ResultInfo, PNTR MQ_ResultInfoPtr; 69 70 /* AM: The structure used to convey subject id and evalue information to 71 DivideSeqAligns() when using query multiplexing. */ 72 typedef struct _MQ_ResultInfo 73 { 74 Int4 subject_id; 75 Nlm_FloatHi evalue; 76 } MQ_DivideResultsInfo, PNTR MQ_DivideResultsInfoPtr; 77 78 /* AM: This structure holds the pointer to the sarray of seqalign lists 79 obtained by distributing seqaligns between queries when query 80 multiplexing is used. */ 81 typedef struct sapArrayData 82 { 83 SeqAlignPtr PNTR sap_array; 84 } SapArrayData, PNTR SapArrayDataPtr; 85 86 /*--KM the search->MultQueries structure with information about 87 the individual queries when the -B option is used for query 88 concatenation. Needed for the search block*/ 89 typedef struct queries { 90 Uint4 NumQueries; 91 Int8 TotalLength; /* AM: Total length of the concatenated query. */ 92 BspArray FakeBsps; /* contain SeqIdPtr's */ 93 SeqLocPtr PNTR LCaseMasks; /* contain lower case masks in queries. */ 94 IntArray QueryStarts; /* starts/ends: element for each query */ 95 IntArray QueryEnds; 96 IntArray WhichQuery; /* "Which" arrays: for each letter in concat seq */ 97 IntArray WhichPos; 98 99 IntArray EffLengths; /* AM: Effective lengths of queries */ 100 IntArray Adjustments; /* AM: Query length adjustments */ 101 102 /* AM: Array of effective search spaces that should be used in e-value 103 calculation instead of the search space size derived from the length 104 of the concatenated sequence. */ 105 FloatArray SearchSpEff; 106 107 /* AM: Array of effective database lengths. */ 108 Int8Array DbLenEff; 109 110 /* AM: The following are needed for cutoff correction. */ 111 Int4 MinLen; 112 Int4 MinLenEff; 113 Int8 MinDbLenEff; 114 Nlm_FloatHi MinSearchSpEff; 115 Nlm_FloatHi LambdaMin, LambdaMax, LogKMin, LogKMax; 116 117 /* AM: The following are for storing hitlists relevant to a particular query. */ 118 Uint4 current_query; /* AM: Query currently being processed. */ 119 Boolean use_mq; /* AM: Tells some functions whether to use query multiplexing. */ 120 BLAST_HitListPtr PNTR HitListArray; /* Array of pointers to hitlists per query. */ 121 122 MQ_ResultInfoPtr result_info; /* Information about results found for each query. */ 123 Uint4 max_results_per_query; /* Max number of results to keep for each query. */ 124 Boolean delete_current_hitlist; /* true, if search->current_hitlist would have 125 been deleted in BlastSaveCurrentHitlist(). */ 126 127 BLAST_Score PNTR dropoff_2nd_pass_array; /* Individual values of dropoff_2nd_pass 128 parameter per query. */ 129 Nlm_FloatHi PNTR lambda_array; /* Infividual values of kbp->Lambda per query. */ 130 131 SapArrayDataPtr sap_array_data; /* Pointer to the final array of seqalign lists. */ 132 133 /* The seqaligns are not here b/c they are not in the original 134 search structure either; they are created and used as they are 135 needed in Main. */ 136 } Queries, PNTR QueriesPtr; 137 138 139 typedef struct _PrimaryNode 140 { 141 SeqAlignPtr sap; 142 Int4 subject_id; 143 Nlm_FloatHi evalue; 144 struct _PrimaryNode PNTR next; 145 } PrimaryNode, PNTR PrimaryNodePtr, PNTR PNTR PrimaryNodePtrArray; 146 147 /* ----Prototypes----- */ 148 149 SeqAlignPtrArray LIBCALL DivideSeqAligns PROTO(( BLAST_OptionsBlkPtr options, SeqAlignPtr sap, 150 QueriesPtr mult_queries, MQ_DivideResultsInfoPtr subjects )); 151 BioseqPtr LIBCALL BlastMakeFakeBspConcat PROTO((BspArray bsp_arr, Uint4 num_bsps, Boolean is_na, 152 Uint4 num_spacers)); /* AM: Added num_spacers parameter */ 153 QueriesPtr LIBCALL BlastMakeMultQueries PROTO((BspArray fbsp_arr, Uint4 num_queries, Boolean is_na, Uint4 num_spacers, SeqLocPtr PNTR lcase_mask_arr )); 154 QueriesPtr LIBCALL BlastDuplicateMultQueries PROTO(( QueriesPtr source )); 155 Uint4 GetQueryNum( QueriesPtr mult_queries, Int4 offset, Int4 end, Int2 frame ); 156 Uint4 LIBCALL GetNumSpacers PROTO(( BLAST_OptionsBlkPtr options, 157 Boolean believe_query, 158 BspArray fake_bsp_arr )); 159 void LIBCALL InitHitLists PROTO(( BlastSearchBlkPtr search )); 160 Int4 LIBCALL ResultIndex1 PROTO(( BLASTResultHitlistPtr ptr, 161 BLASTResultHitlistPtr PNTR results, 162 Int4 num_elements )); 163 Int4 LIBCALL ResultIndex PROTO(( Nlm_FloatHi target_e, Int4 target_score, Int4 subject_id, 164 BLASTResultHitlistPtr PNTR results, 165 Int4 num_elements )); 166 void LIBCALL MQ_UpdateResultLists PROTO(( QueriesPtr mult_queries )); 167 SeqLocPtr LIBCALL ConcatSeqLoc PROTO(( QueriesPtr mult_queries, SeqLocPtr loc, SeqIdPtr id, Uint4 qnum )); 168 QueriesPtr LIBCALL BlastMultQueriesDestruct PROTO(( QueriesPtr queries )); 169 170 #endif 171 172