1 /* ===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================*/
24 /*****************************************************************************
25 
26 File name: blastconcatdef.h
27 
28 Author: Karolina Maciag, Aleksandr Morgulis
29 
30 Contents: type definitions and function prototypes for query
31           multiplexing code.
32 
33 ******************************************************************************/
34 /* $Revision: 1.5 $
35 *  $Log: blastconcatdef.h,v $
36 *  Revision 1.5  2005/09/26 15:02:58  morgulis
37 *  Fixing some memort leaks when using query concatenation in blastn and tblastn.
38 *
39 *  Revision 1.4  2005/01/10 18:52:29  coulouri
40 *  fixes from morgulis to allow concatenation of >255 queries in [t]blastn
41 *
42 *  Revision 1.3  2004/04/20 14:55:47  morgulis
43 *  1. Fixed query offsets in results when -B option is used.
44 *  2. Fixes for lower case masking handling with -B option.
45 *
46 *  Revision 1.2  2003/12/29 15:42:46  coulouri
47 *  tblastn query concatenation fixes from morgulis
48 *
49 *  Revision 1.1  2003/03/24 20:47:28  madden
50 *  Utilities for concatenation of blastn/tblastn queries
51 *
52 * */
53 
54 #ifndef _BLASTCONCATDEF_
55 #define _BLASTCONCATDEF_
56 
57 #include <blastconcat.h>
58 
59 /* AM: MQ_ResultInfo is a structure containing the information about the
60        current number of results, best evalue and score for a particular
61        query. Those are used to ensure correct printing of results in
62        the case of multiple queries. One such structure is maintained per
63        input query. */
64 typedef struct mq_ResultInfo
65 {
66   Uint4 NumResults;
67   BLASTResultHitlistPtr PNTR results;
68 } MQ_ResultInfo, PNTR MQ_ResultInfoPtr;
69 
70 /* AM: The structure used to convey subject id and evalue information to
71        DivideSeqAligns() when using query multiplexing. */
72 typedef struct _MQ_ResultInfo
73 {
74   Int4 subject_id;
75   Nlm_FloatHi evalue;
76 } MQ_DivideResultsInfo, PNTR MQ_DivideResultsInfoPtr;
77 
78 /* AM: This structure holds the pointer to the sarray of seqalign lists
79        obtained by distributing seqaligns between queries when query
80        multiplexing is used. */
81 typedef struct sapArrayData
82 {
83   SeqAlignPtr PNTR sap_array;
84 } SapArrayData, PNTR SapArrayDataPtr;
85 
86 /*--KM the search->MultQueries structure with information about
87   the individual queries when the -B option is used for query
88   concatenation. Needed for the search block*/
89 typedef struct queries {
90     Uint4 NumQueries;
91     Int8 TotalLength;    /* AM: Total length of the concatenated query. */
92     BspArray FakeBsps;   /* contain SeqIdPtr's */
93     SeqLocPtr PNTR LCaseMasks; /* contain lower case masks in queries. */
94     IntArray QueryStarts;   /* starts/ends: element for each query */
95     IntArray QueryEnds;
96     IntArray WhichQuery;    /* "Which" arrays: for each letter in concat seq */
97     IntArray WhichPos;
98 
99     IntArray EffLengths;  /* AM: Effective lengths of queries */
100     IntArray Adjustments; /* AM: Query length adjustments */
101 
102     /* AM: Array of effective search spaces that should be used in e-value
103            calculation instead of the search space size derived from the length
104 	   of the concatenated sequence. */
105     FloatArray SearchSpEff;
106 
107     /* AM: Array of effective database lengths. */
108     Int8Array DbLenEff;
109 
110     /* AM: The following are needed for cutoff correction. */
111     Int4 MinLen;
112     Int4 MinLenEff;
113     Int8 MinDbLenEff;
114     Nlm_FloatHi MinSearchSpEff;
115     Nlm_FloatHi LambdaMin, LambdaMax, LogKMin, LogKMax;
116 
117     /* AM: The following are for storing hitlists relevant to a particular query. */
118     Uint4 current_query; /* AM: Query currently being processed. */
119     Boolean use_mq; /* AM: Tells some functions whether to use query multiplexing. */
120     BLAST_HitListPtr PNTR HitListArray; /* Array of pointers to hitlists per query. */
121 
122     MQ_ResultInfoPtr result_info; /* Information about results found for each query. */
123     Uint4 max_results_per_query;  /* Max number of results to keep for each query. */
124     Boolean delete_current_hitlist; /* true, if search->current_hitlist would have
125                                        been deleted in BlastSaveCurrentHitlist(). */
126 
127     BLAST_Score PNTR dropoff_2nd_pass_array; /* Individual values of dropoff_2nd_pass
128                                                 parameter per query. */
129     Nlm_FloatHi PNTR lambda_array; /* Infividual values of kbp->Lambda per query. */
130 
131     SapArrayDataPtr sap_array_data; /* Pointer to the final array of seqalign lists. */
132 
133     /* The seqaligns are not here b/c they are not in the original
134        search structure either; they are created and used as they are
135        needed in Main.  */
136 } Queries, PNTR QueriesPtr;
137 
138 
139 typedef struct _PrimaryNode
140 {
141   SeqAlignPtr sap;
142   Int4 subject_id;
143   Nlm_FloatHi evalue;
144   struct _PrimaryNode PNTR next;
145 } PrimaryNode, PNTR PrimaryNodePtr, PNTR PNTR PrimaryNodePtrArray;
146 
147 /* ----Prototypes----- */
148 
149 SeqAlignPtrArray LIBCALL DivideSeqAligns PROTO(( BLAST_OptionsBlkPtr options, SeqAlignPtr sap,
150                                                  QueriesPtr mult_queries, MQ_DivideResultsInfoPtr subjects ));
151 BioseqPtr LIBCALL BlastMakeFakeBspConcat PROTO((BspArray bsp_arr, Uint4 num_bsps, Boolean is_na,
152                                                 Uint4 num_spacers)); /* AM: Added num_spacers parameter */
153 QueriesPtr LIBCALL BlastMakeMultQueries PROTO((BspArray fbsp_arr, Uint4 num_queries, Boolean is_na, Uint4 num_spacers, SeqLocPtr PNTR lcase_mask_arr ));
154 QueriesPtr LIBCALL BlastDuplicateMultQueries PROTO(( QueriesPtr source ));
155 Uint4 GetQueryNum( QueriesPtr mult_queries, Int4 offset, Int4 end, Int2 frame );
156 Uint4 LIBCALL GetNumSpacers PROTO(( BLAST_OptionsBlkPtr options,
157                                     Boolean believe_query,
158 				    BspArray fake_bsp_arr ));
159 void LIBCALL InitHitLists PROTO(( BlastSearchBlkPtr search ));
160 Int4 LIBCALL ResultIndex1 PROTO(( BLASTResultHitlistPtr ptr,
161                                   BLASTResultHitlistPtr PNTR results,
162 				  Int4 num_elements ));
163 Int4 LIBCALL ResultIndex PROTO(( Nlm_FloatHi target_e, Int4 target_score, Int4 subject_id,
164                                  BLASTResultHitlistPtr PNTR results,
165 				 Int4 num_elements ));
166 void LIBCALL MQ_UpdateResultLists PROTO(( QueriesPtr mult_queries ));
167 SeqLocPtr LIBCALL ConcatSeqLoc PROTO(( QueriesPtr mult_queries, SeqLocPtr loc, SeqIdPtr id, Uint4 qnum ));
168 QueriesPtr LIBCALL BlastMultQueriesDestruct PROTO(( QueriesPtr queries ));
169 
170 #endif
171 
172