1 static char const rcsid[] = "$Id: blastconcat.c,v 1.14 2007/05/07 13:30:54 kans Exp $";
2
3 /* ===========================================================================
4 *
5 * PUBLIC DOMAIN NOTICE
6 * National Center for Biotechnology Information
7 *
8 * This software/database is a "United States Government Work" under the
9 * terms of the United States Copyright Act. It was written as part of
10 * the author's official duties as a United States Government employee and
11 * thus cannot be copyrighted. This software/database is freely available
12 * to the public for use. The National Library of Medicine and the U.S.
13 * Government have not placed any restriction on its use or reproduction.
14 *
15 * Although all reasonable efforts have been taken to ensure the accuracy
16 * and reliability of the software and data, the NLM and the U.S.
17 * Government do not and cannot warrant the performance or results that
18 * may be obtained by using this software or data. The NLM and the U.S.
19 * Government disclaim all warranties, express or implied, including
20 * warranties of performance, merchantability or fitness for any particular
21 * purpose.
22 *
23 * Please cite the author in any work or product based on this material.
24 *
25 * ===========================================================================*/
26 /*****************************************************************************
27
28 File name: blastconcat.c
29
30 Author: Karolina Maciag, Aleksandr Morgulis
31
32 Contents: implementation of functions needed for query multiplexing
33 functionality.
34
35 ******************************************************************************/
36 /* $Revision: 1.14 $
37 * $Log: blastconcat.c,v $
38 * Revision 1.14 2007/05/07 13:30:54 kans
39 * added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
40 *
41 * Revision 1.13 2007/03/13 20:39:35 madden
42 * - In GetNumSpacers, change the type the local variables
43 * dropoff_1st_pass_bits, dropoff_2nd_pass_bits, gap_x_dropoff_bits,
44 * and gap_x_dropoff_final_bits to Nlm_FloatHi as all these values are
45 * properly floating point values and are floating point values in the
46 * options structure.
47 * - In GetNumSpacers, don't case DROPOFF_NUMBER_OF_BITS to integer.
48 * [from Mike Gertz]
49 *
50 * Revision 1.12 2005/10/06 12:52:23 madden
51 * Changes to support correct gapped stats for blastn
52 *
53 * Revision 1.11 2005/09/26 15:02:58 morgulis
54 * Fixing some memort leaks when using query concatenation in blastn and tblastn.
55 *
56 * Revision 1.10 2005/01/10 18:52:29 coulouri
57 * fixes from morgulis to allow concatenation of >255 queries in [t]blastn
58 *
59 * Revision 1.9 2004/04/21 19:25:04 coulouri
60 * do not cast lvalues
61 *
62 * Revision 1.8 2004/04/21 13:54:23 camacho
63 * Remove unused variable
64 *
65 * Revision 1.7 2004/04/20 14:55:47 morgulis
66 * 1. Fixed query offsets in results when -B option is used.
67 * 2. Fixes for lower case masking handling with -B option.
68 *
69 * Revision 1.6 2004/02/26 15:52:29 papadopo
70 * Mike Gertz' modifications to unify handling of gapped Karlin blocks between protein and nucleotide searches
71 *
72 * Revision 1.5 2003/12/29 15:42:46 coulouri
73 * tblastn query concatenation fixes from morgulis
74 *
75 * Revision 1.4 2003/05/30 17:25:36 coulouri
76 * add rcsid
77 *
78 * Revision 1.3 2003/03/25 15:02:11 madden
79 * Code cleanup from A. Morgulis
80 *
81 * Revision 1.2 2003/03/24 21:16:03 madden
82 * Cast before calling StrCat
83 *
84 * Revision 1.1 2003/03/24 20:47:28 madden
85 * Utilities for concatenation of blastn/tblastn queries
86 *
87 * */
88
89 #include <ncbi.h>
90 #include <objalign.h>
91 #include <objseq.h>
92
93 #ifndef _BLASTCONCAT_
94 #include <blastconcat.h>
95 #endif
96
97 #include <blastpri.h>
98 #include <blastdef.h>
99 #include <blastconcatdef.h>
100
101 /* AM: The following function returns the minimal number of spacers necessary to
102 create the concatenated query.
103
104 Parameters:
105
106 options The option block created based on blast command line arguments.
107 believe_query Flag indicating whether to believe the original bsp;
108 fake_bsp_arr The array of queries to be concatenated
109 */
GetNumSpacers(BLAST_OptionsBlkPtr options,Boolean believe_query,BspArray fake_bsp_arr)110 Uint4 LIBCALL GetNumSpacers PROTO(( BLAST_OptionsBlkPtr options, Boolean believe_query,
111 BspArray fake_bsp_arr ))
112 {
113 Nlm_FloatHi dropoff_1st_pass_bits, /* Dropoff ("X") used for the first pass (bits) */
114 dropoff_2nd_pass_bits, /* Dropoff ("X") used for the second pass (bits ) */
115 gap_x_dropoff_bits, /* Dropoff ("X") used for gapped alignments (bits) */
116 gap_x_dropoff_final_bits; /* Dropoff ("X") used for final gapped alignment (bits) */
117 Int4 query_length, /* The length of the first query in fake_bsp_arr. */
118 query_loc_start, /* Index of the start of the query sequence */
119 index,
120 i;
121 BLAST_Score dropoff_1st_pass, /* Dropoff ("X") used for the first pass */
122 dropoff_2nd_pass, /* Dropoff ("X") used for the second pass */
123 gap_x_dropoff, /* Dropoff ("X") used for gapped alignments */
124 gap_x_dropoff_final, /* Dropoff ("X") used for final gapped alignment */
125 X_score = 0, /* The score of the ambiguous match */
126 max_dropoff; /* max of dropoff_1st_pass, dropoff_2nd_pass, gap_x_dropoff, gap_x_dropoff_final */
127 Nlm_FloatHi Lambda; /* Lambda parameter (used here for converting scores from bit scores) */
128 BLAST_KarlinBlkPtr kbp, /* Pointer to blast parameter block (contains Lambda)*/
129 kbp_gap;
130 SeqLocPtr query_slp, /* Sequence location corresponding to the whole fake_bsp_arr[0] */
131 private_slp; /* Sequence location constructed based on the query strand */
132 BioseqPtr fake_bsp, /* Constructed if the original query can not be trusted */
133 query_bsp; /* == fake_bsp_arr[0] */
134 Int2 first_context, /* First context of the sequence */
135 last_context, /* Last context of the sequence */
136 status;
137 Uint1 alphabet, /* Sequence alphabet identifier */
138 strand, /* Sequence strands (taken from options) */
139 residue; /* Temporary used as an iterator in the loop over the letters in the sequence */
140 BLAST_ScoreBlkPtr sbp; /* Score block constructed from the sequence */
141 SeqPortPtr spp = NULL; /* Sequence port for reading sequence content */
142 Uint1Ptr query_seq = NULL, /* Points to the sequence data */
143 query_seq_start = NULL; /* Points to the actual start of the sequence data */
144 BLAST_ScorePtr PNTR matrix; /* Blast parameter matrix (contained in sbp) */
145 ValNodePtr error_return = NULL;
146
147 query_bsp = fake_bsp_arr[0];
148
149 if( believe_query ) fake_bsp = query_bsp;
150 else fake_bsp = BlastMakeFakeBioseq( query_bsp, NULL );
151
152 query_slp = NULL;
153 ValNodeAddPointer( &query_slp, SEQLOC_WHOLE,
154 SeqIdDup( SeqIdFindBest( fake_bsp->id, SEQID_GI )));
155
156 BlastGetFirstAndLastContext( options->program_name, query_slp,
157 &first_context, &last_context,
158 options->strand_option );
159
160 query_length = SeqLocLen( query_slp );
161
162 if( StringCmp( options->program_name, "blastn" ) == 0 )
163 alphabet = BLASTNA_SEQ_CODE;
164 else alphabet = Seq_code_ncbistdaa;
165
166 sbp = BLAST_ScoreBlkNew( alphabet, last_context + 1 );
167
168 if( StringCmp( options->program_name, "blastn" ) != 0 )
169 {
170 sbp->read_in_matrix = TRUE;
171 BlastScoreSetAmbigRes( sbp, 'X' );
172 }
173 else
174 {
175 if( options->matrix != NULL && *(options->matrix) != NULLB )
176 sbp->read_in_matrix = TRUE;
177 else sbp->read_in_matrix = FALSE;
178
179 BlastScoreSetAmbigRes( sbp, 'N' );
180 }
181
182 sbp->penalty = options->penalty;
183 sbp->reward = options->reward;
184 sbp->query_length = query_length;
185
186 if( options->matrix != NULL )
187 status = BlastScoreBlkMatFill( sbp, options->matrix );
188 else status = BlastScoreBlkMatFill( sbp, "BLOSUM62" );
189
190 if( status != 0 )
191 {
192 ErrPostEx( SEV_WARNING, 0, 0,
193 "BlastScoreBlkMatFill returned non-zero status" );
194 }
195
196 query_loc_start = SeqLocStart( query_slp );
197 strand = SeqLocStrand( query_slp );
198
199 if( strand == Seq_strand_unknown
200 || strand == Seq_strand_plus
201 || strand == Seq_strand_both )
202 private_slp = SeqLocIntNew( query_loc_start, SeqLocStop( query_slp ),
203 Seq_strand_plus, SeqLocId( query_slp ) );
204 else
205 private_slp = SeqLocIntNew( query_loc_start, SeqLocStop( query_slp ),
206 Seq_strand_minus, SeqLocId( query_slp ) );
207
208 if( StringCmp( options->program_name, "tblastn" ) == 0 )
209 {
210 spp = SeqPortNewByLoc( private_slp, Seq_code_ncbistdaa );
211 SeqPortSet_do_virtual( spp, TRUE );
212 }
213 else
214 {
215 spp = SeqPortNewByLoc( private_slp, Seq_code_ncbi4na );
216 SeqPortSet_do_virtual( spp, TRUE );
217 }
218
219 if( spp )
220 {
221 query_seq_start = (Uint1Ptr)MemNew( 2*(query_length + 2)*sizeof( Char ) );
222 query_seq_start[0] = NULLB;
223 query_seq = query_seq_start + 1;
224 index = 0;
225
226 while( (residue = SeqPortGetResidue( spp )) != SEQPORT_EOF )
227 if( IS_residue( residue ) )
228 {
229 if( residue == 24 ) residue = 21;
230
231 query_seq[index] = residue;
232 ++index;
233 }
234
235 query_seq[index] = NULLB;
236 spp = SeqPortFree( spp );
237
238 if( StringCmp( options->program_name, "blastn" ) == 0 )
239 for( index = 0; index <= query_length + 1; ++index )
240 query_seq_start[index] = ncbi4na_to_blastna[query_seq_start[index]];
241 }
242
243
244 if( StringCmp( options->program_name, "blastn" ) == 0 )
245 {
246 BlastKarlinBlkNuclGappedCalc(sbp->kbp_gap_std[first_context], options->gap_open, options->gap_extend, options->reward, options->penalty, sbp->kbp_std[first_context], &(sbp->round_down), &error_return);
247
248 if( first_context == 0 )
249 BlastScoreBlkFill( sbp, (CharPtr)query_seq_start, query_length, 0 );
250
251 if( last_context == 1 )
252 BlastScoreBlkFill( sbp, (CharPtr)query_seq_start, query_length, 1 );
253 /* EMG: kbp and kbp_gap both appear to be unused in this routine */
254 sbp->kbp_gap = sbp->kbp_gap_std;
255 sbp->kbp = sbp->kbp_std;
256 kbp = sbp->kbp[first_context];
257 kbp_gap = sbp->kbp[first_context];
258 Lambda = kbp->Lambda;
259 }
260 else /* tblastn */
261 {
262 BlastScoreBlkFill( sbp, (CharPtr)query_seq_start, query_length, 0 );
263 sbp->kbp_gap_std[0] = BlastKarlinBlkCreate();
264 BlastKarlinBlkGappedCalcEx( sbp->kbp_gap_std[0], options->gap_open,
265 options->gap_extend, options->decline_align,
266 sbp->name, &error_return );
267 sbp->kbp_gap = sbp->kbp_gap_std;
268 sbp->kbp = sbp->kbp_std;
269 kbp_gap = sbp->kbp_gap[first_context];
270 Lambda = kbp_gap->Lambda;
271 }
272
273 matrix = sbp->matrix;
274
275 if( StringCmp( options->program_name, "blastn" ) == 0 )
276 X_score = abs(-matrix[0][sbp->ambiguous_res[0]]);
277 else /* tblastn */
278 {
279 X_score = INT4_MAX;
280
281 for( i = 1; i < sbp->mat_dim2; ++i )
282 if( matrix[i][sbp->ambiguous_res[0]]
283 && abs( matrix[i][sbp->ambiguous_res[0]] ) < X_score )
284 X_score = abs( matrix[i][sbp->ambiguous_res[0]] );
285 }
286
287 dropoff_1st_pass_bits = options->dropoff_1st_pass;
288 dropoff_2nd_pass_bits = options->dropoff_2nd_pass;
289 gap_x_dropoff_bits = options->gap_x_dropoff;
290 gap_x_dropoff_final_bits = options->gap_x_dropoff_final;
291
292 if( !dropoff_1st_pass_bits )
293 dropoff_1st_pass_bits = DROPOFF_NUMBER_OF_BITS;
294
295 if( !dropoff_2nd_pass_bits )
296 dropoff_2nd_pass_bits = DROPOFF_NUMBER_OF_BITS;
297
298 gap_x_dropoff = (BLAST_Score)(gap_x_dropoff_bits*NCBIMATH_LN2/Lambda);
299 gap_x_dropoff_final
300 = (BLAST_Score)(gap_x_dropoff_final_bits*NCBIMATH_LN2/Lambda);
301 gap_x_dropoff_final = MAX( gap_x_dropoff_final, gap_x_dropoff );
302 dropoff_1st_pass
303 = (BLAST_Score)ceil( (Nlm_FloatHi)dropoff_1st_pass_bits*NCBIMATH_LN2
304 /Lambda );
305 dropoff_2nd_pass
306 = (BLAST_Score)ceil( (Nlm_FloatHi)dropoff_2nd_pass_bits*NCBIMATH_LN2
307 /Lambda );
308
309 max_dropoff = MAX( gap_x_dropoff_final, gap_x_dropoff );
310
311 if( max_dropoff < dropoff_1st_pass ) max_dropoff = dropoff_1st_pass;
312
313 if( max_dropoff < dropoff_2nd_pass ) max_dropoff = dropoff_2nd_pass;
314
315 SeqLocFree( private_slp );
316 BLAST_ScoreBlkDestruct( sbp );
317 SeqLocFree( query_slp );
318 SeqMgrDeleteFromBioseqIndex( fake_bsp );
319 MemFree(query_seq_start);
320 return (Uint4)ceil( ((Nlm_FloatHi)max_dropoff*1.5)/X_score );
321 }
322
323 /* AM: This functions returns the number of the original query given an
324 offset and frame into the concatenated query. */
GetQueryNum(QueriesPtr mult_queries,Int4 offset,Int4 end,Int2 frame)325 Uint4 GetQueryNum( QueriesPtr mult_queries, Int4 offset, Int4 end, Int2 frame )
326 {
327 Int4 tmp_offset = 0;
328 Int4 tmp_end = 0;
329
330 if( frame == 1 )
331 if( offset > mult_queries->TotalLength )
332 {
333 tmp_offset = 2*mult_queries->TotalLength + 1 - offset;
334 tmp_end = 2*mult_queries->TotalLength + 1 - end;
335 }
336 else
337 {
338 tmp_offset = offset;
339 tmp_end = end;
340 }
341 else if( frame == -1 )
342 {
343 tmp_offset = mult_queries->TotalLength - offset;
344 tmp_end = mult_queries->TotalLength - end;
345 }
346 else if( frame == 0 )
347 {
348 tmp_offset = offset;
349 tmp_end = end;
350 }
351
352 while( tmp_offset != tmp_end
353 && (mult_queries->WhichQuery[tmp_offset] <= 0
354 || mult_queries->WhichQuery[tmp_offset]
355 > mult_queries->NumQueries) )
356 if( tmp_offset > tmp_end )
357 --tmp_offset;
358 else ++tmp_offset;
359
360 return mult_queries->WhichQuery[tmp_offset] - 1;
361 }
362
363 /* AM: Comparison routine for SeqAlignPtr used by DivideSeqAligns. */
CompareSeqAlignPtr(Nlm_VoidPtr afirst,Nlm_VoidPtr asecond)364 static int LIBCALL CompareSeqAlignPtr PROTO(( Nlm_VoidPtr afirst, Nlm_VoidPtr asecond ))
365 {
366 Nlm_FloatHi bit_score1, evalue1, bit_score2, evalue2;
367 Int4 number1, score1, number2, score2;
368 PrimaryNodePtr PNTR first_node = (PrimaryNodePtr PNTR) afirst;
369 PrimaryNodePtr PNTR second_node = (PrimaryNodePtr PNTR) asecond;
370 SeqAlignPtr first = (*first_node)->sap;
371 SeqAlignPtr second = (*second_node)->sap;
372 GetScoreAndEvalue( first, &score1, &bit_score1, &evalue1, &number1 );
373 GetScoreAndEvalue( second, &score2, &bit_score2, &evalue2, &number2 );
374
375 if( (*first_node)->evalue > (*second_node)->evalue ) return -1;
376 else if( (*first_node)->evalue < (*second_node)->evalue ) return 1;
377 else if( score1 > score2 ) return 1;
378 else if( score1 < score2 ) return -1;
379 else if( (*first_node)->subject_id > (*second_node)->subject_id ) return 1;
380 else if( (*first_node)->subject_id < (*second_node)->subject_id ) return -1;
381 else return 0;
382 }
383
384 /* AM: This function divides a list of alignments for the concatenated query into the
385 array of lists of alignments per each original query.
386
387 params:
388
389 options --- program options (used to determine which program is being run).
390 sap --- points to the head of the list of alignments for concatenated query.
391 mult_queries --- points to the structure containing information about individual
392 queries.
393 subjects --- array of structures containing information of subject ids and
394 evalues of seqaligns. This is necessary to ensure correct order
395 of results.
396
397 return value --- array of pointers to the heads of the lists of alignments: one per
398 every original query.
399 */
DivideSeqAligns(BLAST_OptionsBlkPtr options,SeqAlignPtr sap,QueriesPtr mult_queries,MQ_DivideResultsInfoPtr subjects)400 SeqAlignPtrArray LIBCALL DivideSeqAligns PROTO(( BLAST_OptionsBlkPtr options,
401 SeqAlignPtr sap,
402 QueriesPtr mult_queries,
403 MQ_DivideResultsInfoPtr subjects ))
404 {
405 typedef struct _SecondaryNode
406 {
407 SeqAlignPtr sap; /* SeqAlign asociated with this node */
408 SeqAlignPtr primary; /* The primary node asociated with this node */
409 struct _SecondaryNode PNTR next; /* Next element in the list */
410 } SecondaryNode, PNTR SecondaryNodePtr, PNTR PNTR SecondaryNodePtrArray;
411
412 SeqAlignPtrArray result = NULL; /* The resulting array of seqalign lists. Returned by the function */
413 SeqAlignPtr head = NULL,
414 next_sap = NULL;
415 SecondaryNodePtrArray secondary = NULL; /* The array of secondary seqalign lists */
416 SecondaryNodePtr secondary_node_pool = NULL; /* The set of free secondary nodes */
417 SecondaryNodePtr next_free_secondary_node = NULL,
418 new_secondary_node;
419 PrimaryNodePtrArray primary = NULL; /* The array of primary seqalign lists */
420 PrimaryNodePtrArray primaries_per_query; /* Array for heap sort. */
421 PrimaryNodePtr primary_node_pool = NULL; /* The set of free primary nodes */
422 PrimaryNodePtr next_free_primary_node = NULL,
423 new_primary_node;
424 IntArray lengths; /* Array of sized of secondary node lists */
425 Uint1Array new_primary;
426 Uint4 num_queries = 0, /* Total number of original queries */
427 num_seqaligns = 0, /* Number of seqaligns in the input list */
428 start = 0,
429 query = 0;
430 Uint4 count = 0,
431 count1 = 0,
432 primary_array_size = 0; /* The maximum size of a primary node list */
433 SeqIdPtr subject_id = NULL, /* The id of the "current" subject sequence */
434 last_id = NULL; /* Saved id of the previous subject sequence. */
435 SeqLocPtr loc = NULL; /* Sequence location if segs is of type StdSeg */
436
437 num_queries = mult_queries->NumQueries;
438
439 /* Count the seqaligns in the input list */
440 for( head = sap; head; head = head->next ) ++num_seqaligns;
441
442 /* Memory allocation */
443 result = (SeqAlignPtrArray)MemNew( sizeof( SeqAlignPtr )*num_queries );
444 primary = (PrimaryNodePtrArray)MemNew( sizeof( PrimaryNodePtr )*num_queries );
445 primary_node_pool = (PrimaryNodePtr)MemNew( sizeof( PrimaryNode )*num_seqaligns );
446 next_free_primary_node = primary_node_pool;
447 secondary = (SecondaryNodePtrArray)MemNew( sizeof( SecondaryNodePtr )*num_queries );
448 secondary_node_pool = (SecondaryNodePtr)MemNew( sizeof( SecondaryNode )*num_seqaligns );
449 next_free_secondary_node = secondary_node_pool;
450 lengths = (IntArray)MemNew( sizeof( Int4 )*num_queries );
451 new_primary = (Uint1Array)MemNew( sizeof( Uint1 )*num_queries );
452
453 /* Primary and secondary lists creation. */
454 for( count = 0; sap; sap = next_sap )
455 {
456 if( StringCmp( options->program_name, "blastn" ) == 0 )
457 start = *((DenseSegPtr)(sap->segs))->starts;
458 else
459 {
460 loc = ((StdSegPtr)(sap->segs))->loc;
461 start = SeqLocStart( loc );
462 }
463
464 while( mult_queries->WhichQuery[start] <= 0
465 || mult_queries->WhichQuery[start] > num_queries )
466 ++start;
467
468 query = mult_queries->WhichQuery[start] - 1;
469 next_sap = sap->next;
470 subject_id = TxGetSubjectIdFromSeqAlign( sap );
471
472 if( !last_id || SeqIdComp( subject_id, last_id ) != SIC_YES )
473 MemFill( (void *)new_primary, 0, sizeof( Uint1 )*num_queries );
474
475 last_id = subject_id;
476
477 if( !new_primary[query] )
478 {
479 ++lengths[query];
480 new_primary[query] = 1;
481 new_primary_node = next_free_primary_node++;
482 new_primary_node->sap = sap;
483 new_primary_node->evalue = subjects[count].evalue;
484 new_primary_node->subject_id = subjects[count++].subject_id;
485 new_primary_node->next = primary[query];
486 primary[query] = new_primary_node;
487 }
488 else
489 {
490 new_secondary_node = next_free_secondary_node++;
491 new_secondary_node->sap = sap;
492 new_secondary_node->primary = primary[query]->sap;
493 new_secondary_node->next = secondary[query];
494 secondary[query] = new_secondary_node;
495 }
496 }
497
498 /* Allocate primary array for sorting. */
499 for( count = 0; count < num_queries; ++count )
500 if( lengths[count] > primary_array_size )
501 primary_array_size = lengths[count];
502
503 /* primary = (SortPairNodePtr)MemNew( sizeof( SortPairNode )*primary_array_size ); */
504 primaries_per_query = (PrimaryNodePtrArray)MemNew(
505 sizeof( PrimaryNodePtr )*primary_array_size );
506
507 /* Sort the primary arrays, then merge primary and secondary nodes
508 for each query.*/
509 for( count = 0; count < num_queries; ++count )
510 {
511 for( count1 = 0, new_primary_node = primary[count]; new_primary_node;
512 new_primary_node = new_primary_node->next, ++count1 )
513 primaries_per_query[count1] = new_primary_node;
514
515 result[count] = NULL;
516 Nlm_HeapSort( (Nlm_VoidPtr)primaries_per_query, count1,
517 sizeof( PrimaryNodePtr ), CompareSeqAlignPtr );
518
519 for( count1 = 0; count1 < lengths[count]; ++count1 )
520 {
521 primaries_per_query[count1]->sap->next = result[count];
522 result[count] = primaries_per_query[count1]->sap;
523 }
524
525 for( new_secondary_node = secondary[count]; new_secondary_node;
526 new_secondary_node = new_secondary_node->next )
527 {
528 new_secondary_node->sap->next = new_secondary_node->primary->next;
529 new_secondary_node->primary->next = new_secondary_node->sap;
530 }
531
532 MemFill( (void *)primaries_per_query, 0, sizeof( SeqAlignPtr )*primary_array_size );
533 }
534
535 /* Cleanup */
536 MemFree( (void *)primary );
537 MemFree( (void *)primary_node_pool );
538 MemFree( (void *)primaries_per_query );
539 MemFree( (void *)new_primary );
540 MemFree( (void *)lengths );
541 MemFree( (void *)secondary_node_pool );
542 MemFree( (void *)secondary );
543
544 /* Now correct offsets in result. */
545 for( count = 0; count < num_queries; ++count )
546 if( result[count] )
547 {
548 sap = result[count];
549
550 while( sap )
551 {
552 SeqIdPtr tmpid = NULL;
553
554 if( StringCmp( options->program_name, "blastn" ) == 0 )
555 {
556 Uint4 i = 0;
557 DenseSegPtr dsp = (DenseSegPtr)(sap->segs);
558
559 for( ; i < (dsp->dim)*(dsp->numseg); i += dsp->dim )
560 {
561 start = dsp->starts[i];
562
563 if( start != -1 )
564 start = start - mult_queries->QueryStarts[count];
565
566 dsp->starts[i] = start;
567 }
568
569 tmpid = SeqIdDup( mult_queries->FakeBsps[count]->id );
570 tmpid->next = dsp->ids->next;
571 SeqIdFree( dsp->ids );
572 dsp->ids = tmpid;
573 }
574 else
575 {
576 Uint4 end = 0, len;
577 SeqLocPtr loc, newloc = NULL;
578 StdSegPtr ssp = (StdSegPtr)(sap->segs);
579 SeqIdPtr newid = NULL;
580 tmpid = mult_queries->FakeBsps[count]->id;
581
582 while( ssp )
583 {
584 loc = ssp->loc;
585 start = SeqLocStart( loc );
586
587 if( start != -1 )
588 {
589 end = SeqLocStop( loc );
590 len = end - start;
591 start = start - mult_queries->QueryStarts[count];
592 end = start + len;
593 newloc = SeqLocIntNew( start, end, SeqLocStrand( loc ), tmpid );
594 }
595 else
596 {
597 newloc = ValNodeNew( NULL );
598 newloc->choice = SEQLOC_EMPTY;
599 newloc->data.ptrvalue = SeqIdDup( tmpid );
600 }
601
602 newid = SeqIdDup( tmpid );
603 newid->next = ssp->ids->next;
604 SeqIdFree( ssp->ids );
605 ssp->ids = newid;
606 newloc->next = loc->next;
607 ssp->loc = newloc;
608 SeqLocFree( loc );
609 ssp = ssp->next;
610 }
611 }
612
613 sap = sap->next;
614 }
615 }
616
617 return result;
618 }
619
620 /* BlastMakeFakeBspConcat
621 Given an array of fake_bsps, this function creates a new fake_bsp. The data
622 field of the new bsp is constructed using the concatenated sequences of the
623 bsps in the array, in the same order as they appear in the array.
624 Protein sequences are concatenated directly.
625 Nucleotide sequences are encoded with two per byte, so they are concatenated
626 byte-wise.
627 Parameters: bsp_arr, the array of fake_bsps from which sequences are to be pulled
628 num_bsps, the maximum number of bsps in the array to use
629 is_na: true if query is nucleotide, false if amino acid seq
630 num_spacers: the number of spacer characters between the original
631 sequences in the concatenated query. (AM:)
632 note: assumes that the array bsp_arr has at least num_bsp elements!
633
634 AM: Changes to align on the byte boundaries.
635 */
636 BioseqPtr LIBCALL
BlastMakeFakeBspConcat(BspArray bsp_arr,Uint4 num_bsps,Boolean is_na,Uint4 num_spacers)637 BlastMakeFakeBspConcat PROTO((BspArray bsp_arr, Uint4 num_bsps, Boolean is_na, Uint4 num_spacers)){
638 BioseqPtr tot;
639 Int4 bsp_iter, letter_iter;
640 /* "compact" len refers to #bytes to store the seq, not to #of letters */
641 Int4 tot_len, tot_compact_len, spacer_compact_len;
642 Int4 indiv_len; /*keep an Int4 to satisfy BlastGetSequenceFromBioseq */
643 Uint4 total_written;
644 BioseqPtr curr_bsp;
645 Uint1Ptr indiv_seq = NULL, concat_seq;
646 ByteStorePtr byte_store;
647 Uint1 PNTR spacer_seq_nuc, PNTR spacer_seq_prot;
648 SeqLocPtr slp = NULL;
649 SeqPortPtr spp = NULL;
650 Uint1 residue, shift = 0;
651 Int4 index = 0;
652 Uint1 newcode = Seq_code_ncbi4na, oldcode;
653 ByteStorePtr from = NULL,
654 to = NULL;
655
656 /* AM: Determine the size and allocate spacer arrays. */
657 if( is_na )
658 {
659 num_spacers = (num_spacers + 1)/2;
660
661 /* Recode all BSPs to NCBI4na */
662 for( bsp_iter = 0; bsp_iter < num_bsps; ++bsp_iter )
663 {
664 oldcode = bsp_arr[bsp_iter]->seq_data_type;
665 if (oldcode == Seq_code_gap) continue;
666 from = (ByteStorePtr) bsp_arr[bsp_iter]->seq_data;
667
668 if( !(to = BSConvertSeq( from, newcode, oldcode,
669 bsp_arr[bsp_iter]->length )) )
670 return NULL;
671
672 bsp_arr[bsp_iter]->seq_data_type = newcode;
673 bsp_arr[bsp_iter]->seq_data = (SeqDataPtr) to;
674 }
675 }
676
677 spacer_seq_nuc = (Uint1 PNTR)MemNew( sizeof( Uint1 )*(num_spacers + 1) );
678 spacer_seq_prot = spacer_seq_nuc;
679
680 /* create the spacer sequences to insert between sequences */
681 spacer_compact_len = num_spacers;
682
683 for( letter_iter = 0; letter_iter < spacer_compact_len; ++letter_iter )
684 if( is_na ) spacer_seq_nuc[letter_iter] = TWO_N;
685 else spacer_seq_prot[letter_iter] = 'X';
686
687 spacer_seq_nuc[letter_iter] = 0;
688 tot = BioseqNew();
689
690 if (tot == NULL)
691 return NULL;
692
693 /* initialize the fake_bsp with fields of the first bsp in the array */
694 tot = (BioseqPtr) BlastMakeFakeBioseq( (BioseqPtr) *bsp_arr, NULL);
695
696 /* find total length of letters */
697 tot_len = 0;
698
699 for( bsp_iter=0; bsp_iter<num_bsps; bsp_iter++ )
700 {
701 curr_bsp = *(bsp_arr+bsp_iter);
702 tot_len += curr_bsp->length;
703
704 if( is_na && bsp_iter != num_bsps - 1 && curr_bsp->length%2 ) ++tot_len;
705 }
706
707 /* compact length indicates actual number of bytes in representation */
708 tot_compact_len = (is_na) ? (tot_len + 1)/2 : tot_len;
709 tot_compact_len += spacer_compact_len * (num_bsps-1);
710
711 /* total len is in terms of number of letters */
712 tot_len += ( (is_na) ? num_spacers*2 : num_spacers) * (num_bsps-1);
713
714 concat_seq = MemNew((1 + tot_compact_len) * sizeof(Uint1));
715 concat_seq[0] = 0;
716
717 if (concat_seq == NULL)
718 return NULL;
719
720 total_written = 0;
721
722 for( bsp_iter = 0; bsp_iter < num_bsps; ++bsp_iter )
723 {
724 curr_bsp = bsp_arr[bsp_iter];
725 indiv_len = curr_bsp->length;
726
727 if( is_na )
728 {
729 /* indiv_seq = (Uint1Ptr)(bsp_arr[bsp_iter]->seq_data->chain->str); */
730 indiv_len = (indiv_len + 1)/2;
731 indiv_seq = MemNew( (1 + indiv_len)*sizeof( Uint1 ) );
732
733 if( !indiv_seq ) return NULL;
734
735 slp = NULL;
736 index = 0;
737 shift = 0;
738 spp = SeqPortNew( bsp_arr[bsp_iter], 0, -1, Seq_strand_plus, Seq_code_ncbi4na );
739
740 while( (residue = SeqPortGetResidue( spp )) != SEQPORT_EOF )
741 {
742 if( shift )
743 {
744 indiv_seq[index/2] = ((indiv_seq[index/2])&0xF0);
745 residue = (residue&0xF);
746 indiv_seq[index/2] |= residue;
747 }
748 else
749 {
750 indiv_seq[index/2] = 0;
751 residue = (residue << 4);
752 indiv_seq[index/2] = residue;
753 }
754
755 shift = 1 - shift;
756 ++index;
757 }
758
759 indiv_seq[indiv_len] = 0;
760 spp = SeqPortFree( spp );
761
762 if( bsp_iter )
763 {
764 StrCat( (CharPtr) concat_seq, (CharPtr) spacer_seq_nuc );
765 total_written += num_spacers;
766 }
767
768 StrCat( (CharPtr) concat_seq, (CharPtr) indiv_seq );
769 total_written += indiv_len;
770
771 if( curr_bsp->length%2 )
772 {
773 concat_seq[total_written - 1] = concat_seq[total_written - 1]&0xF0;
774 concat_seq[total_written - 1] = concat_seq[total_written - 1]|LAST_N;
775 }
776
777 indiv_seq = MemFree( indiv_seq );
778 }
779 else
780 {
781 indiv_seq = (Uint1Ptr)BlastGetSequenceFromBioseq( bsp_arr[bsp_iter], &indiv_len );
782
783 if( bsp_iter ) StrCat( (CharPtr) concat_seq, (CharPtr) spacer_seq_prot );
784
785 StrCat( (CharPtr) concat_seq, (CharPtr) indiv_seq );
786 indiv_seq = MemFree(indiv_seq);
787 }
788 }
789
790 byte_store = BSNew(0);
791 BSWrite(byte_store, (VoidPtr) concat_seq, tot_compact_len);
792
793 tot->seq_data = (SeqDataPtr) byte_store;
794 tot->length = tot_len;
795
796 MemFree(concat_seq);
797 MemFree( (void *)spacer_seq_nuc );
798 return (tot);
799 }
800
801 /* Duplicates Queries structure for use with multiple threads. */
BlastDuplicateMultQueries(QueriesPtr source)802 QueriesPtr LIBCALL BlastDuplicateMultQueries PROTO(( QueriesPtr source ))
803 {
804 QueriesPtr result = NULL;
805 result = (QueriesPtr) MemNew (sizeof (Queries) );
806
807 if( !result ) return NULL;
808
809 result->NumQueries = source->NumQueries;
810 result->TotalLength = source->TotalLength;
811 result->FakeBsps = source->FakeBsps;
812 result->LCaseMasks = source->LCaseMasks;
813 result->QueryStarts = source->QueryStarts;
814 result->QueryEnds = source->QueryEnds;
815 result->WhichQuery = source->WhichQuery;
816 result->WhichPos = source->WhichPos;
817 result->EffLengths = source->EffLengths;
818 result->Adjustments = source->Adjustments;
819 result->SearchSpEff = source->SearchSpEff;
820 result->DbLenEff = source->DbLenEff;
821 result->MinLen = source->MinLen;
822 result->MinLenEff = source->MinLenEff;
823 result->MinDbLenEff = source->MinDbLenEff;
824 result->MinSearchSpEff = source->MinSearchSpEff;
825 result->LambdaMin = source->LambdaMin;
826 result->LambdaMax = source->LambdaMax;
827 result->LogKMin = source->LogKMin;
828 result->LogKMax = source->LogKMax;
829 result->HitListArray = NULL;
830 result->result_info = source->result_info;
831 result->max_results_per_query = source->max_results_per_query;
832 result->dropoff_2nd_pass_array = source->dropoff_2nd_pass_array;
833 result->lambda_array = source->lambda_array;
834 result->sap_array_data = source->sap_array_data;
835 return result;
836 }
837
BlastMultQueriesDestruct(QueriesPtr queries)838 QueriesPtr LIBCALL BlastMultQueriesDestruct PROTO(( QueriesPtr queries ))
839 {
840 Int4 i;
841
842 if( queries ) {
843 for( i = 0; i < queries->NumQueries; ++i ) {
844 MemFree( queries->result_info[i].results );
845
846 if( queries->HitListArray )
847 BlastHitListDestruct( queries->HitListArray[i] );
848 }
849
850 if( queries->sap_array_data )
851 MemFree( queries->sap_array_data->sap_array );
852
853 MemFree( queries->WhichPos );
854 MemFree( queries->WhichQuery );
855 MemFree( queries->sap_array_data );
856 MemFree( queries->dropoff_2nd_pass_array );
857 MemFree( queries->lambda_array );
858 MemFree( queries->result_info );
859 MemFree( queries->EffLengths );
860 MemFree( queries->Adjustments );
861 MemFree( queries->DbLenEff );
862 MemFree( queries->SearchSpEff );
863 MemFree( queries->QueryEnds );
864 MemFree( queries->QueryStarts );
865 MemFree( queries->FakeBsps );
866 MemFree( queries->HitListArray );
867 }
868
869 return MemFree( queries );
870 }
871
872 /* BlastMakeMultQueries
873 Given an array of fake_bsps corresponding to individual BLAST queries, returns
874 a pointer to a Queries structure containing information about the individual
875 queries.
876 Parameters: bsp_arr, the array of fake_bsps
877 num_queries, the maximum number of fake_bsps from the array to process
878 note: assumes that bsp_arr has minimum num_queries elements
879 Arrays created giving information about query number and position within
880 individual query for each location on the concatenated sequence are
881 based on numbering beginning with 1, not 0 for queries and positions
882 */
883 QueriesPtr LIBCALL
BlastMakeMultQueries(BspArray bsp_arr,Uint4 num_queries,Boolean is_na,Uint4 num_spacers,SeqLocPtr PNTR lcase_mask_arr)884 BlastMakeMultQueries PROTO((BspArray bsp_arr, Uint4 num_queries, Boolean is_na, Uint4 num_spacers, SeqLocPtr PNTR lcase_mask_arr))
885 {
886 QueriesPtr queries;
887 Int4 bsp_iter, pos_iter;
888 BioseqPtr curr_bsp;
889 Int4 curr_index;
890 Int4 curr_length;
891 IntArray starts, ends;
892 Int4 query, pos, query_holder;
893 Int4 *which_query;
894 Int4 *which_pos;
895
896 queries = (QueriesPtr) MemNew (sizeof (Queries) );
897
898 queries->NumQueries = num_queries;
899 queries->FakeBsps = bsp_arr;
900 queries->LCaseMasks = lcase_mask_arr;
901
902 /* AM: These has to be dynamically allocated (bug fix) */
903 starts = (IntArray)MemNew( sizeof( Int4 )*num_queries );
904 ends = (IntArray)MemNew( sizeof( Int4 )*num_queries );
905
906 /* AM: Inserted allocation of effective lengths, adjustment, search
907 space size, and dblen arrays. */
908 queries->SearchSpEff = (FloatArray)MemNew( sizeof( Nlm_FloatHi )*num_queries );
909 queries->DbLenEff = (Int8Array)MemNew( sizeof( Int8 )*num_queries );
910 queries->Adjustments = (IntArray)MemNew( sizeof( Int4 )*num_queries );
911 queries->EffLengths = (IntArray)MemNew( sizeof( Int4 )*num_queries );
912 queries->result_info = (MQ_ResultInfoPtr)MemNew( sizeof( MQ_ResultInfo )*num_queries );
913 queries->lambda_array = (Nlm_FloatHi PNTR)MemNew( sizeof( Nlm_FloatHi )*num_queries );
914 queries->dropoff_2nd_pass_array
915 = (BLAST_Score PNTR)MemNew( sizeof( BLAST_Score )*num_queries );
916 queries->sap_array_data = (SapArrayDataPtr)MemNew( sizeof( SapArrayData ) );
917
918 queries->MinLen = INT4_MAX;
919 queries->MinLenEff = INT4_MAX;
920 queries->MinDbLenEff = INT8_MAX;
921
922 queries->HitListArray = NULL;
923 queries->use_mq = FALSE;
924
925 /* determine and record query starts and ends in the concatenated query */
926 curr_index = 0;
927 for (bsp_iter=0; bsp_iter<num_queries; bsp_iter++) {
928 curr_bsp = *(bsp_arr + bsp_iter);
929 curr_length = curr_bsp->length;
930 starts[bsp_iter] = curr_index;
931 curr_index += curr_length;
932 ends[bsp_iter] = curr_index - 1;
933 curr_index += num_spacers;
934
935 if( is_na && bsp_iter != num_queries - 1 && curr_length%2 ) ++curr_index;
936 }
937 curr_index -= num_spacers;
938 /* now curr_index = total number of letters in concat. query */
939 queries->QueryStarts = starts;
940 queries->QueryEnds = ends;
941
942 /* create arrays showing querynum/pos in indiv. query for each letter in the
943 concatenated query sequence
944 assumes well-formed starts/ends arrays */
945 which_query = MemNew (sizeof(Int4) * curr_index);
946 which_pos = MemNew (sizeof(Int4) * curr_index);
947 query = query_holder = 1;
948 pos = 1;
949 for (pos_iter=0; pos_iter<curr_index; pos_iter++) {
950 /* zero-th letter in seq will be query 1, pos 1 */
951 *(which_query + pos_iter) = query;
952 *(which_pos + pos_iter) = pos;
953
954 if (ends[query_holder-1] == pos_iter) {
955 query_holder++;
956 query = SPACER_POS;
957 pos = 1;
958 } else if (starts[query_holder-1] == pos_iter) {
959 query = query_holder;
960 pos = 1;
961 } else {
962 pos++;
963 }
964 }
965 queries->WhichQuery = which_query;
966 queries->WhichPos = which_pos;
967 return queries;
968 }
969
970 /* ConcatSeqLoc():
971
972 Takes a SeqLoc of type SEQLOCK_PACKED_INT that has offsets local to
973 individual queries and returns a new SeqLoc with offsets relative to
974 the concatenated query.
975
976 */
ConcatSeqLoc(QueriesPtr mult_queries,SeqLocPtr loc,SeqIdPtr id,Uint4 qnum)977 SeqLocPtr LIBCALL ConcatSeqLoc PROTO(( QueriesPtr mult_queries, SeqLocPtr loc, SeqIdPtr id, Uint4 qnum ))
978 {
979 SeqLocPtr result = NULL;
980 SeqLocPtr current = NULL, newloc = NULL, last = NULL;
981 Uint4 start, end;
982
983 if( mult_queries && loc->choice == SEQLOC_PACKED_INT )
984 {
985 result = ValNodeNew( NULL );
986 result->choice = loc->choice;
987 current = (SeqLocPtr)loc->data.ptrvalue;
988
989 while( current )
990 {
991 start = SeqLocStart( current );
992 end = SeqLocStop( current );
993 start = mult_queries->QueryStarts[qnum] + start;
994 end = mult_queries->QueryStarts[qnum] + end;
995 newloc = SeqLocIntNew( start, end, SeqLocStrand( loc ), id );
996
997 if( last )
998 last->next = newloc;
999 else
1000 result->data.ptrvalue = (void *)newloc;
1001
1002 last = newloc;
1003 current = current->next;
1004 }
1005 }
1006
1007 return result;
1008 }
1009
1010 /* InitHitLists():
1011
1012 This function breaks search->current_hitlist into multiple lists for
1013 each query and fills in the structures in search->mult_queries.
1014
1015 */
InitHitLists(BlastSearchBlkPtr search)1016 void LIBCALL InitHitLists PROTO(( BlastSearchBlkPtr search ))
1017 {
1018 QueriesPtr queries;
1019 BLAST_HitListPtr * hitlist_array;
1020 BLAST_HitListPtr current_hitlist;
1021 BLAST_HSPPtr hsp;
1022 Uint4 i, query_num;
1023
1024 if( !search ) return;
1025
1026 queries = search->mult_queries;
1027 current_hitlist = search->current_hitlist;
1028
1029 if( !queries || !current_hitlist ) return;
1030
1031 hitlist_array = queries->HitListArray;
1032
1033 if( !hitlist_array )
1034 {
1035 hitlist_array = (BLAST_HitListPtr *)MemNew(
1036 sizeof( BLAST_HitListPtr )*queries->NumQueries );
1037 queries->HitListArray = hitlist_array;
1038
1039 for( i = 0; i < queries->NumQueries; ++i )
1040 hitlist_array[i] = BlastHitListNew( search );
1041 }
1042
1043 for( i = 0; i < queries->NumQueries; ++i )
1044 {
1045 BlastHitListPurge( hitlist_array[i] );
1046 hitlist_array[i]->next = NULL;
1047 hitlist_array[i]->hspmax = current_hitlist->hspmax;
1048 hitlist_array[i]->hspcnt = 0;
1049 hitlist_array[i]->hspcnt_max = current_hitlist->hspcnt_max;
1050 hitlist_array[i]->further_process = current_hitlist->further_process;
1051 hitlist_array[i]->do_not_reallocate = current_hitlist->do_not_reallocate;
1052 MemFree( hitlist_array[i]->lh_helper );
1053 hitlist_array[i]->lh_helper = NULL;
1054 hitlist_array[i]->lh_helper_size = current_hitlist->lh_helper_size;
1055 MemFree( hitlist_array[i]->hsp_array );
1056 hitlist_array[i]->hsp_array = (BLAST_HSPPtr PNTR)MemNew(
1057 sizeof( BLAST_HSPPtr )*hitlist_array[i]->hspmax );
1058 }
1059
1060 for( i = 0; i < current_hitlist->hspcnt; ++i )
1061 {
1062 hsp = current_hitlist->hsp_array[i];
1063
1064 if( hsp )
1065 {
1066 query_num = GetQueryNum( queries, hsp->query.offset,
1067 hsp->query.end, hsp->query.frame );
1068 hitlist_array[query_num]->hsp_array[hitlist_array[query_num]->hspcnt] = hsp;
1069 current_hitlist->hsp_array[i] = NULL;
1070 ++hitlist_array[query_num]->hspcnt;
1071 }
1072 }
1073 }
1074 /* TEMPORARY FUNCTION - FOR DEBUGGING ONLY */
MQ_CheckLists(BLASTResultHitlistPtr PNTR local,Int4 lsize,BLASTResultHitlistPtr PNTR res,Int4 rsize)1075 void LIBCALL MQ_CheckLists PROTO(( BLASTResultHitlistPtr PNTR local, Int4 lsize,
1076 BLASTResultHitlistPtr PNTR res, Int4 rsize ))
1077 {
1078 Int4 i, j;
1079
1080 for( i = 0; i < lsize; ++i )
1081 {
1082 int found = 0;
1083
1084 for( j = 0; j < rsize; ++j )
1085 if( local[i] == res[j] )
1086 {
1087 found = 1;
1088 break;
1089 }
1090
1091 if( !found )
1092 {
1093 fprintf( stderr, "ERROR: %d is not in res\n", i );
1094 return;
1095 }
1096 }
1097 }
1098
1099 /* ResultIndex1():
1100
1101 This function first calls the binary search via ResultIndex(). Then tries
1102 to locate the exact object in the array by pointer.
1103
1104 */
ResultIndex1(BLASTResultHitlistPtr ptr,BLASTResultHitlistPtr PNTR results,Int4 num_elements)1105 Int4 LIBCALL ResultIndex1 PROTO(( BLASTResultHitlistPtr ptr,
1106 BLASTResultHitlistPtr PNTR results,
1107 Int4 num_elements ))
1108 {
1109 Int4 del_index = ResultIndex( ptr->best_evalue, ptr->high_score,
1110 ptr->subject_id, results, num_elements );
1111 ASSERT( del_index < num_elements );
1112
1113 if( results[del_index] == ptr ) return del_index;
1114
1115 while( del_index >= 0 && results[del_index]->subject_id == ptr->subject_id )
1116 --del_index;
1117
1118 while( ++del_index < num_elements && results[del_index] != ptr );
1119
1120 ASSERT( del_index < num_elements );
1121
1122 return del_index;
1123 }
1124
1125 /* ResultIndex():
1126
1127 This function performs a binary search in results for an element with
1128 goven evalue, score, and subject id. The number of elements in results
1129 is given by num_elements.
1130
1131 */
ResultIndex(Nlm_FloatHi target_e,Int4 target_score,Int4 subject_id,BLASTResultHitlistPtr PNTR results,Int4 num_elements)1132 Int4 LIBCALL ResultIndex PROTO(( Nlm_FloatHi target_e, Int4 target_score, Int4 subject_id,
1133 BLASTResultHitlistPtr PNTR results,
1134 Int4 num_elements ))
1135 {
1136 Int4 high_index, low_index, new_index, old_index;
1137
1138 if( !num_elements ) return 0;
1139
1140 high_index = 0;
1141 low_index = num_elements - 1;
1142 new_index = (high_index + low_index)/2;
1143 old_index = new_index;
1144
1145 while( 1 )
1146 {
1147 if( results[new_index]->best_evalue > target_e )
1148 low_index = new_index;
1149 else if( results[new_index]->best_evalue < target_e )
1150 high_index = new_index;
1151 else if( results[new_index]->high_score < target_score )
1152 low_index = new_index;
1153 else if( results[new_index]->high_score > target_score )
1154 high_index = new_index;
1155 else if( results[new_index]->subject_id < subject_id )
1156 low_index = new_index;
1157 else high_index = new_index;
1158
1159 new_index = (high_index + low_index)/2;
1160
1161 if( old_index == new_index )
1162 {
1163 while( new_index < num_elements )
1164 if( results[new_index]->best_evalue < target_e ) ++new_index;
1165 else if( results[new_index]->best_evalue == target_e
1166 && results[new_index]->high_score > target_score )
1167 ++new_index;
1168 else if( results[new_index]->best_evalue == target_e
1169 && results[new_index]->high_score == target_score
1170 && results[new_index]->subject_id > subject_id )
1171 ++new_index;
1172 else break;
1173
1174 break;
1175 }
1176
1177 old_index = new_index;
1178 }
1179
1180 return new_index;
1181 }
1182
1183 /* MQ_UpdateResultLists():
1184
1185 Updates the individual hitlists in mult_queries removing
1186 unreferenced elements.
1187
1188 */
MQ_UpdateResultLists(QueriesPtr mult_queries)1189 void LIBCALL MQ_UpdateResultLists PROTO(( QueriesPtr mult_queries ))
1190 {
1191 Uint4 qnum, index, new_index = 0;
1192 MQ_ResultInfoPtr result_info;
1193
1194 for( qnum = 0; qnum < mult_queries->NumQueries; ++qnum )
1195 {
1196 result_info = mult_queries->result_info + qnum;
1197
1198 for( index = 0; index < result_info->NumResults; ++index )
1199 if( result_info->results[index]->num_ref > 0 )
1200 result_info->results[new_index++] = result_info->results[index];
1201
1202 result_info->NumResults = new_index;
1203 }
1204 }
1205
1206