1 static char const rcsid[] = "$Id: blastconcat.c,v 1.14 2007/05/07 13:30:54 kans Exp $";
2 
3 /* ===========================================================================
4 *
5 *                            PUBLIC DOMAIN NOTICE
6 *               National Center for Biotechnology Information
7 *
8 *  This software/database is a "United States Government Work" under the
9 *  terms of the United States Copyright Act.  It was written as part of
10 *  the author's official duties as a United States Government employee and
11 *  thus cannot be copyrighted.  This software/database is freely available
12 *  to the public for use. The National Library of Medicine and the U.S.
13 *  Government have not placed any restriction on its use or reproduction.
14 *
15 *  Although all reasonable efforts have been taken to ensure the accuracy
16 *  and reliability of the software and data, the NLM and the U.S.
17 *  Government do not and cannot warrant the performance or results that
18 *  may be obtained by using this software or data. The NLM and the U.S.
19 *  Government disclaim all warranties, express or implied, including
20 *  warranties of performance, merchantability or fitness for any particular
21 *  purpose.
22 *
23 *  Please cite the author in any work or product based on this material.
24 *
25 * ===========================================================================*/
26 /*****************************************************************************
27 
28 File name: blastconcat.c
29 
30 Author: Karolina Maciag, Aleksandr Morgulis
31 
32 Contents: implementation of functions needed for query multiplexing
33           functionality.
34 
35 ******************************************************************************/
36 /* $Revision: 1.14 $
37 *  $Log: blastconcat.c,v $
38 *  Revision 1.14  2007/05/07 13:30:54  kans
39 *  added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
40 *
41 *  Revision 1.13  2007/03/13 20:39:35  madden
42 *   - In GetNumSpacers, change the type the local variables
43 *     dropoff_1st_pass_bits, dropoff_2nd_pass_bits, gap_x_dropoff_bits,
44 *     and gap_x_dropoff_final_bits to Nlm_FloatHi as all these values are
45 *     properly floating point values and are floating point values in the
46 *     options structure.
47 *   - In GetNumSpacers, don't case DROPOFF_NUMBER_OF_BITS to integer.
48 *   [from Mike Gertz]
49 *
50 *  Revision 1.12  2005/10/06 12:52:23  madden
51 *  Changes to support correct gapped stats for blastn
52 *
53 *  Revision 1.11  2005/09/26 15:02:58  morgulis
54 *  Fixing some memort leaks when using query concatenation in blastn and tblastn.
55 *
56 *  Revision 1.10  2005/01/10 18:52:29  coulouri
57 *  fixes from morgulis to allow concatenation of >255 queries in [t]blastn
58 *
59 *  Revision 1.9  2004/04/21 19:25:04  coulouri
60 *  do not cast lvalues
61 *
62 *  Revision 1.8  2004/04/21 13:54:23  camacho
63 *  Remove unused variable
64 *
65 *  Revision 1.7  2004/04/20 14:55:47  morgulis
66 *  1. Fixed query offsets in results when -B option is used.
67 *  2. Fixes for lower case masking handling with -B option.
68 *
69 *  Revision 1.6  2004/02/26 15:52:29  papadopo
70 *  Mike Gertz' modifications to unify handling of gapped Karlin blocks between protein and nucleotide searches
71 *
72 *  Revision 1.5  2003/12/29 15:42:46  coulouri
73 *  tblastn query concatenation fixes from morgulis
74 *
75 *  Revision 1.4  2003/05/30 17:25:36  coulouri
76 *  add rcsid
77 *
78 *  Revision 1.3  2003/03/25 15:02:11  madden
79 *  Code cleanup from A. Morgulis
80 *
81 *  Revision 1.2  2003/03/24 21:16:03  madden
82 *  Cast before calling StrCat
83 *
84 *  Revision 1.1  2003/03/24 20:47:28  madden
85 *  Utilities for concatenation of blastn/tblastn queries
86 *
87 * */
88 
89 #include <ncbi.h>
90 #include <objalign.h>
91 #include <objseq.h>
92 
93 #ifndef _BLASTCONCAT_
94 #include <blastconcat.h>
95 #endif
96 
97 #include <blastpri.h>
98 #include <blastdef.h>
99 #include <blastconcatdef.h>
100 
101 /* AM: The following function returns the minimal number of spacers necessary to
102        create the concatenated query.
103 
104        Parameters:
105 
106          options	The option block created based on blast command line arguments.
107 	 believe_query	Flag indicating whether to believe the original bsp;
108          fake_bsp_arr	The array of queries to be concatenated
109 */
GetNumSpacers(BLAST_OptionsBlkPtr options,Boolean believe_query,BspArray fake_bsp_arr)110 Uint4 LIBCALL GetNumSpacers PROTO(( BLAST_OptionsBlkPtr options, Boolean believe_query,
111                       BspArray fake_bsp_arr ))
112 {
113   Nlm_FloatHi               dropoff_1st_pass_bits,	/* Dropoff ("X") used for the first pass (bits) */
114                      dropoff_2nd_pass_bits,     /* Dropoff ("X") used for the second pass (bits ) */
115                      gap_x_dropoff_bits,	/* Dropoff ("X") used for gapped alignments (bits) */
116                      gap_x_dropoff_final_bits;  /* Dropoff ("X") used for final gapped alignment (bits) */
117 Int4		     query_length,		/* The length of the first query in fake_bsp_arr. */
118 		     query_loc_start,           /* Index of the start of the query sequence */
119 		     index,
120 		     i;
121   BLAST_Score        dropoff_1st_pass, 		/* Dropoff ("X") used for the first pass */
122                      dropoff_2nd_pass,          /* Dropoff ("X") used for the second pass */
123                      gap_x_dropoff,  		/* Dropoff ("X") used for gapped alignments */
124 	             gap_x_dropoff_final,      	/* Dropoff ("X") used for final gapped alignment */
125 	             X_score = 0,		/* The score of the ambiguous match */
126 	             max_dropoff;       	/* max of dropoff_1st_pass, dropoff_2nd_pass, gap_x_dropoff, gap_x_dropoff_final */
127   Nlm_FloatHi        Lambda;			/* Lambda parameter (used here for converting scores from bit scores) */
128   BLAST_KarlinBlkPtr kbp,			/* Pointer to blast parameter block (contains Lambda)*/
129 	             kbp_gap;
130   SeqLocPtr          query_slp,			/* Sequence location corresponding to the whole fake_bsp_arr[0] */
131                      private_slp;		/* Sequence location constructed based on the query strand */
132   BioseqPtr	     fake_bsp, 			/* Constructed if the original query can not be trusted */
133                      query_bsp;			/* == fake_bsp_arr[0] */
134   Int2		     first_context,		/* First context of the sequence */
135                      last_context,		/* Last context of the sequence */
136 		     status;
137   Uint1		     alphabet,			/* Sequence alphabet identifier */
138                      strand,			/* Sequence strands (taken from options) */
139 		     residue;			/* Temporary used as an iterator in the loop over the letters in the sequence */
140   BLAST_ScoreBlkPtr  sbp;			/* Score block constructed from the sequence */
141   SeqPortPtr	     spp = NULL;		/* Sequence port for reading sequence content */
142   Uint1Ptr	     query_seq = NULL,		/* Points to the sequence data */
143                      query_seq_start = NULL;    /* Points to the actual start of the sequence data */
144   BLAST_ScorePtr     PNTR matrix;		/* Blast parameter matrix (contained in sbp) */
145   ValNodePtr         error_return = NULL;
146 
147   query_bsp = fake_bsp_arr[0];
148 
149   if( believe_query ) fake_bsp = query_bsp;
150   else fake_bsp = BlastMakeFakeBioseq( query_bsp, NULL );
151 
152   query_slp = NULL;
153   ValNodeAddPointer( &query_slp, SEQLOC_WHOLE,
154                      SeqIdDup( SeqIdFindBest( fake_bsp->id, SEQID_GI )));
155 
156   BlastGetFirstAndLastContext( options->program_name, query_slp,
157                                &first_context, &last_context,
158 			       options->strand_option );
159 
160   query_length = SeqLocLen( query_slp );
161 
162   if( StringCmp( options->program_name, "blastn" ) == 0 )
163     alphabet = BLASTNA_SEQ_CODE;
164   else alphabet = Seq_code_ncbistdaa;
165 
166   sbp = BLAST_ScoreBlkNew( alphabet, last_context + 1 );
167 
168   if( StringCmp( options->program_name, "blastn" ) != 0 )
169   {
170     sbp->read_in_matrix = TRUE;
171     BlastScoreSetAmbigRes( sbp, 'X' );
172   }
173   else
174   {
175     if( options->matrix != NULL && *(options->matrix) != NULLB )
176       sbp->read_in_matrix = TRUE;
177     else sbp->read_in_matrix = FALSE;
178 
179     BlastScoreSetAmbigRes( sbp, 'N' );
180   }
181 
182   sbp->penalty = options->penalty;
183   sbp->reward = options->reward;
184   sbp->query_length = query_length;
185 
186   if( options->matrix != NULL )
187     status = BlastScoreBlkMatFill( sbp, options->matrix );
188   else status = BlastScoreBlkMatFill( sbp, "BLOSUM62" );
189 
190   if( status != 0 )
191   {
192     ErrPostEx( SEV_WARNING, 0, 0,
193                "BlastScoreBlkMatFill returned non-zero status" );
194   }
195 
196   query_loc_start = SeqLocStart( query_slp );
197   strand = SeqLocStrand( query_slp );
198 
199   if( strand == Seq_strand_unknown
200       || strand == Seq_strand_plus
201       || strand == Seq_strand_both )
202     private_slp = SeqLocIntNew( query_loc_start, SeqLocStop( query_slp ),
203                                 Seq_strand_plus, SeqLocId( query_slp ) );
204   else
205     private_slp = SeqLocIntNew( query_loc_start, SeqLocStop( query_slp ),
206                                 Seq_strand_minus, SeqLocId( query_slp ) );
207 
208   if( StringCmp( options->program_name, "tblastn" ) == 0 )
209   {
210     spp = SeqPortNewByLoc( private_slp, Seq_code_ncbistdaa );
211     SeqPortSet_do_virtual( spp, TRUE );
212   }
213   else
214   {
215       spp = SeqPortNewByLoc( private_slp, Seq_code_ncbi4na );
216       SeqPortSet_do_virtual( spp, TRUE );
217   }
218 
219   if( spp )
220   {
221     query_seq_start = (Uint1Ptr)MemNew( 2*(query_length + 2)*sizeof( Char ) );
222     query_seq_start[0] = NULLB;
223     query_seq = query_seq_start + 1;
224     index = 0;
225 
226     while( (residue = SeqPortGetResidue( spp )) != SEQPORT_EOF )
227       if( IS_residue( residue ) )
228       {
229         if( residue == 24 ) residue = 21;
230 
231         query_seq[index] = residue;
232 	++index;
233       }
234 
235     query_seq[index] = NULLB;
236     spp = SeqPortFree( spp );
237 
238     if( StringCmp( options->program_name, "blastn" ) == 0 )
239       for( index = 0; index <= query_length + 1; ++index )
240         query_seq_start[index] = ncbi4na_to_blastna[query_seq_start[index]];
241   }
242 
243 
244   if( StringCmp( options->program_name, "blastn" ) == 0 )
245   {
246     BlastKarlinBlkNuclGappedCalc(sbp->kbp_gap_std[first_context], options->gap_open, options->gap_extend, options->reward, options->penalty, sbp->kbp_std[first_context], &(sbp->round_down), &error_return);
247 
248     if( first_context == 0 )
249       BlastScoreBlkFill( sbp, (CharPtr)query_seq_start, query_length, 0 );
250 
251     if( last_context == 1 )
252       BlastScoreBlkFill( sbp, (CharPtr)query_seq_start, query_length, 1 );
253     /* EMG: kbp and kbp_gap both appear to be unused in this routine */
254     sbp->kbp_gap = sbp->kbp_gap_std;
255     sbp->kbp = sbp->kbp_std;
256     kbp     = sbp->kbp[first_context];
257     kbp_gap = sbp->kbp[first_context];
258     Lambda = kbp->Lambda;
259   }
260   else /* tblastn */
261   {
262     BlastScoreBlkFill( sbp, (CharPtr)query_seq_start, query_length, 0 );
263     sbp->kbp_gap_std[0] = BlastKarlinBlkCreate();
264     BlastKarlinBlkGappedCalcEx( sbp->kbp_gap_std[0], options->gap_open,
265                                 options->gap_extend, options->decline_align,
266 				sbp->name, &error_return );
267     sbp->kbp_gap = sbp->kbp_gap_std;
268     sbp->kbp = sbp->kbp_std;
269     kbp_gap = sbp->kbp_gap[first_context];
270     Lambda = kbp_gap->Lambda;
271   }
272 
273   matrix = sbp->matrix;
274 
275   if( StringCmp( options->program_name, "blastn" ) == 0 )
276     X_score = abs(-matrix[0][sbp->ambiguous_res[0]]);
277   else /* tblastn */
278   {
279     X_score = INT4_MAX;
280 
281     for( i = 1; i < sbp->mat_dim2; ++i )
282       if( matrix[i][sbp->ambiguous_res[0]]
283           && abs( matrix[i][sbp->ambiguous_res[0]] ) < X_score )
284         X_score = abs( matrix[i][sbp->ambiguous_res[0]] );
285   }
286 
287   dropoff_1st_pass_bits = options->dropoff_1st_pass;
288   dropoff_2nd_pass_bits = options->dropoff_2nd_pass;
289   gap_x_dropoff_bits = options->gap_x_dropoff;
290   gap_x_dropoff_final_bits = options->gap_x_dropoff_final;
291 
292   if( !dropoff_1st_pass_bits )
293     dropoff_1st_pass_bits = DROPOFF_NUMBER_OF_BITS;
294 
295   if( !dropoff_2nd_pass_bits )
296     dropoff_2nd_pass_bits = DROPOFF_NUMBER_OF_BITS;
297 
298   gap_x_dropoff = (BLAST_Score)(gap_x_dropoff_bits*NCBIMATH_LN2/Lambda);
299   gap_x_dropoff_final
300     = (BLAST_Score)(gap_x_dropoff_final_bits*NCBIMATH_LN2/Lambda);
301   gap_x_dropoff_final = MAX( gap_x_dropoff_final, gap_x_dropoff );
302   dropoff_1st_pass
303     = (BLAST_Score)ceil( (Nlm_FloatHi)dropoff_1st_pass_bits*NCBIMATH_LN2
304                                      /Lambda );
305   dropoff_2nd_pass
306     = (BLAST_Score)ceil( (Nlm_FloatHi)dropoff_2nd_pass_bits*NCBIMATH_LN2
307                                      /Lambda );
308 
309   max_dropoff = MAX( gap_x_dropoff_final, gap_x_dropoff );
310 
311   if( max_dropoff < dropoff_1st_pass ) max_dropoff = dropoff_1st_pass;
312 
313   if( max_dropoff < dropoff_2nd_pass ) max_dropoff = dropoff_2nd_pass;
314 
315   SeqLocFree( private_slp );
316   BLAST_ScoreBlkDestruct( sbp );
317   SeqLocFree( query_slp );
318   SeqMgrDeleteFromBioseqIndex( fake_bsp );
319   MemFree(query_seq_start);
320   return (Uint4)ceil( ((Nlm_FloatHi)max_dropoff*1.5)/X_score );
321 }
322 
323 /* AM: This functions returns the number of the original query given an
324        offset and frame into the concatenated query. */
GetQueryNum(QueriesPtr mult_queries,Int4 offset,Int4 end,Int2 frame)325 Uint4 GetQueryNum( QueriesPtr mult_queries, Int4 offset, Int4 end, Int2 frame )
326 {
327   Int4 tmp_offset = 0;
328   Int4 tmp_end = 0;
329 
330   if( frame == 1 )
331     if( offset > mult_queries->TotalLength )
332     {
333       tmp_offset = 2*mult_queries->TotalLength + 1 - offset;
334       tmp_end = 2*mult_queries->TotalLength + 1 - end;
335     }
336     else
337     {
338       tmp_offset = offset;
339       tmp_end = end;
340     }
341   else if( frame == -1 )
342   {
343     tmp_offset = mult_queries->TotalLength - offset;
344     tmp_end = mult_queries->TotalLength - end;
345   }
346   else if( frame == 0 )
347   {
348     tmp_offset = offset;
349     tmp_end = end;
350   }
351 
352   while( tmp_offset != tmp_end
353          && (mult_queries->WhichQuery[tmp_offset] <= 0
354 	     || mult_queries->WhichQuery[tmp_offset]
355 	        > mult_queries->NumQueries) )
356     if( tmp_offset > tmp_end )
357       --tmp_offset;
358     else ++tmp_offset;
359 
360   return mult_queries->WhichQuery[tmp_offset] - 1;
361 }
362 
363 /* AM: Comparison routine for SeqAlignPtr used by DivideSeqAligns. */
CompareSeqAlignPtr(Nlm_VoidPtr afirst,Nlm_VoidPtr asecond)364 static int LIBCALL CompareSeqAlignPtr PROTO(( Nlm_VoidPtr afirst, Nlm_VoidPtr asecond ))
365 {
366   Nlm_FloatHi bit_score1, evalue1, bit_score2, evalue2;
367   Int4 number1, score1, number2, score2;
368   PrimaryNodePtr PNTR first_node = (PrimaryNodePtr PNTR) afirst;
369   PrimaryNodePtr PNTR second_node = (PrimaryNodePtr PNTR) asecond;
370   SeqAlignPtr first = (*first_node)->sap;
371   SeqAlignPtr second = (*second_node)->sap;
372   GetScoreAndEvalue( first, &score1, &bit_score1, &evalue1, &number1 );
373   GetScoreAndEvalue( second, &score2, &bit_score2, &evalue2, &number2 );
374 
375   if( (*first_node)->evalue > (*second_node)->evalue ) return -1;
376   else if( (*first_node)->evalue < (*second_node)->evalue ) return 1;
377   else if( score1 > score2 ) return 1;
378   else if( score1 < score2 ) return -1;
379   else if( (*first_node)->subject_id > (*second_node)->subject_id ) return 1;
380   else if( (*first_node)->subject_id < (*second_node)->subject_id ) return -1;
381   else return 0;
382 }
383 
384 /* AM: This function divides a list of alignments for the concatenated query into the
385        array of lists of alignments per each original query.
386 
387        params:
388 
389        options		    --- program options (used to determine which program is being run).
390        sap                  --- points to the head of the list of alignments for concatenated query.
391        mult_queries         --- points to the structure containing information about individual
392 	                        queries.
393        subjects		    --- array of structures containing information of subject ids and
394                                 evalues of seqaligns. This is necessary to ensure correct order
395 				of results.
396 
397        return value         --- array of pointers to the heads of the lists of alignments: one per
398                                 every original query.
399 */
DivideSeqAligns(BLAST_OptionsBlkPtr options,SeqAlignPtr sap,QueriesPtr mult_queries,MQ_DivideResultsInfoPtr subjects)400 SeqAlignPtrArray LIBCALL DivideSeqAligns PROTO(( BLAST_OptionsBlkPtr options,
401                                                  SeqAlignPtr sap,
402                                                  QueriesPtr mult_queries,
403 						 MQ_DivideResultsInfoPtr subjects ))
404 {
405   typedef struct _SecondaryNode
406   {
407     SeqAlignPtr sap;			/* SeqAlign asociated with this node */
408     SeqAlignPtr primary;		/* The primary node asociated with this node */
409     struct _SecondaryNode PNTR next;	/* Next element in the list */
410   } SecondaryNode, PNTR SecondaryNodePtr, PNTR PNTR SecondaryNodePtrArray;
411 
412   SeqAlignPtrArray 	result = NULL;				/* The resulting array of seqalign lists. Returned by the function */
413   SeqAlignPtr 		head = NULL,
414    			next_sap = NULL;
415   SecondaryNodePtrArray secondary = NULL;			/* The array of secondary seqalign lists */
416   SecondaryNodePtr 	secondary_node_pool = NULL;		/* The set of free secondary nodes */
417   SecondaryNodePtr 	next_free_secondary_node = NULL,
418   			new_secondary_node;
419   PrimaryNodePtrArray   primary = NULL;				/* The array of primary seqalign lists */
420   PrimaryNodePtrArray   primaries_per_query;			/* Array for heap sort. */
421   PrimaryNodePtr 	primary_node_pool = NULL;		/* The set of free primary nodes */
422   PrimaryNodePtr 	next_free_primary_node = NULL,
423   			new_primary_node;
424   IntArray 		lengths;				/* Array of sized of secondary node lists */
425   Uint1Array 		new_primary;
426   Uint4 		num_queries = 0, 			/* Total number of original queries */
427  			num_seqaligns = 0, 			/* Number of seqaligns in the input list */
428 			start = 0,
429 			query = 0;
430   Uint4 		count = 0,
431   			count1 = 0,
432 			primary_array_size = 0;			/* The maximum size of a primary node list */
433   SeqIdPtr 		subject_id = NULL, 			/* The id of the "current" subject sequence */
434   			last_id = NULL;				/* Saved id of the previous subject sequence. */
435   SeqLocPtr		loc = NULL;				/* Sequence location if segs is of type StdSeg */
436 
437   num_queries = mult_queries->NumQueries;
438 
439   /* Count the seqaligns in the input list */
440   for( head = sap; head; head = head->next ) ++num_seqaligns;
441 
442   /* Memory allocation */
443   result = (SeqAlignPtrArray)MemNew( sizeof( SeqAlignPtr )*num_queries );
444   primary = (PrimaryNodePtrArray)MemNew( sizeof( PrimaryNodePtr )*num_queries );
445   primary_node_pool = (PrimaryNodePtr)MemNew( sizeof( PrimaryNode )*num_seqaligns );
446   next_free_primary_node = primary_node_pool;
447   secondary = (SecondaryNodePtrArray)MemNew( sizeof( SecondaryNodePtr )*num_queries );
448   secondary_node_pool = (SecondaryNodePtr)MemNew( sizeof( SecondaryNode )*num_seqaligns );
449   next_free_secondary_node = secondary_node_pool;
450   lengths = (IntArray)MemNew( sizeof( Int4 )*num_queries );
451   new_primary = (Uint1Array)MemNew( sizeof( Uint1 )*num_queries );
452 
453   /* Primary and secondary lists creation. */
454   for( count = 0; sap; sap = next_sap )
455   {
456     if( StringCmp( options->program_name, "blastn" ) == 0 )
457       start = *((DenseSegPtr)(sap->segs))->starts;
458     else
459     {
460       loc = ((StdSegPtr)(sap->segs))->loc;
461       start = SeqLocStart( loc );
462     }
463 
464     while( mult_queries->WhichQuery[start] <= 0
465            || mult_queries->WhichQuery[start] > num_queries )
466       ++start;
467 
468     query = mult_queries->WhichQuery[start] - 1;
469     next_sap = sap->next;
470     subject_id = TxGetSubjectIdFromSeqAlign( sap );
471 
472     if( !last_id || SeqIdComp( subject_id, last_id ) != SIC_YES )
473       MemFill( (void *)new_primary, 0, sizeof( Uint1 )*num_queries );
474 
475     last_id = subject_id;
476 
477     if( !new_primary[query] )
478     {
479       ++lengths[query];
480       new_primary[query] = 1;
481       new_primary_node = next_free_primary_node++;
482       new_primary_node->sap = sap;
483       new_primary_node->evalue = subjects[count].evalue;
484       new_primary_node->subject_id = subjects[count++].subject_id;
485       new_primary_node->next = primary[query];
486       primary[query] = new_primary_node;
487     }
488     else
489     {
490       new_secondary_node = next_free_secondary_node++;
491       new_secondary_node->sap = sap;
492       new_secondary_node->primary = primary[query]->sap;
493       new_secondary_node->next = secondary[query];
494       secondary[query] = new_secondary_node;
495     }
496   }
497 
498   /* Allocate primary array for sorting. */
499   for( count = 0; count < num_queries; ++count )
500     if( lengths[count] > primary_array_size )
501       primary_array_size = lengths[count];
502 
503   /* primary = (SortPairNodePtr)MemNew( sizeof( SortPairNode )*primary_array_size ); */
504   primaries_per_query = (PrimaryNodePtrArray)MemNew(
505     sizeof( PrimaryNodePtr )*primary_array_size );
506 
507   /* Sort the primary arrays, then merge primary and secondary nodes
508      for each query.*/
509   for( count = 0; count < num_queries; ++count )
510   {
511     for( count1 = 0, new_primary_node = primary[count]; new_primary_node;
512          new_primary_node = new_primary_node->next, ++count1 )
513       primaries_per_query[count1] = new_primary_node;
514 
515     result[count] = NULL;
516     Nlm_HeapSort( (Nlm_VoidPtr)primaries_per_query, count1,
517                   sizeof( PrimaryNodePtr ), CompareSeqAlignPtr );
518 
519     for( count1 = 0; count1 < lengths[count]; ++count1 )
520     {
521       primaries_per_query[count1]->sap->next = result[count];
522       result[count] = primaries_per_query[count1]->sap;
523     }
524 
525     for( new_secondary_node = secondary[count]; new_secondary_node;
526          new_secondary_node = new_secondary_node->next )
527     {
528       new_secondary_node->sap->next = new_secondary_node->primary->next;
529       new_secondary_node->primary->next = new_secondary_node->sap;
530     }
531 
532     MemFill( (void *)primaries_per_query, 0, sizeof( SeqAlignPtr )*primary_array_size );
533   }
534 
535   /* Cleanup */
536   MemFree( (void *)primary );
537   MemFree( (void *)primary_node_pool );
538   MemFree( (void *)primaries_per_query );
539   MemFree( (void *)new_primary );
540   MemFree( (void *)lengths );
541   MemFree( (void *)secondary_node_pool );
542   MemFree( (void *)secondary );
543 
544   /* Now correct offsets in result. */
545   for( count = 0; count < num_queries; ++count )
546     if( result[count] )
547     {
548       sap = result[count];
549 
550       while( sap )
551       {
552 	SeqIdPtr tmpid = NULL;
553 
554         if( StringCmp( options->program_name, "blastn" ) == 0 )
555         {
556 	  Uint4 i = 0;
557 	  DenseSegPtr dsp = (DenseSegPtr)(sap->segs);
558 
559 	  for( ; i < (dsp->dim)*(dsp->numseg); i += dsp->dim )
560 	  {
561             start = dsp->starts[i];
562 
563 	    if( start != -1 )
564 	      start = start - mult_queries->QueryStarts[count];
565 
566             dsp->starts[i] = start;
567 	  }
568 
569 	  tmpid = SeqIdDup( mult_queries->FakeBsps[count]->id );
570 	  tmpid->next = dsp->ids->next;
571           SeqIdFree( dsp->ids );
572 	  dsp->ids = tmpid;
573         }
574         else
575         {
576 	  Uint4 end = 0, len;
577 	  SeqLocPtr loc, newloc = NULL;
578 	  StdSegPtr ssp = (StdSegPtr)(sap->segs);
579 	  SeqIdPtr newid = NULL;
580 	  tmpid = mult_queries->FakeBsps[count]->id;
581 
582 	  while( ssp )
583 	  {
584 	    loc = ssp->loc;
585 	    start = SeqLocStart( loc );
586 
587 	    if( start != -1 )
588 	    {
589 	      end = SeqLocStop( loc );
590 	      len = end - start;
591 	      start = start - mult_queries->QueryStarts[count];
592 	      end = start + len;
593 	      newloc = SeqLocIntNew( start, end, SeqLocStrand( loc ), tmpid );
594 	    }
595 	    else
596 	    {
597 	      newloc = ValNodeNew( NULL );
598 	      newloc->choice = SEQLOC_EMPTY;
599 	      newloc->data.ptrvalue = SeqIdDup( tmpid );
600 	    }
601 
602 	    newid = SeqIdDup( tmpid );
603 	    newid->next = ssp->ids->next;
604             SeqIdFree( ssp->ids );
605 	    ssp->ids = newid;
606 	    newloc->next = loc->next;
607 	    ssp->loc = newloc;
608 	    SeqLocFree( loc );
609 	    ssp = ssp->next;
610 	  }
611         }
612 
613 	sap = sap->next;
614       }
615     }
616 
617   return result;
618 }
619 
620 /* BlastMakeFakeBspConcat
621    Given an array of fake_bsps, this function creates a new fake_bsp. The data
622    field of the new bsp is constructed using the concatenated sequences of the
623    bsps in the array, in the same order as they appear in the array.
624    Protein sequences are concatenated directly.
625    Nucleotide sequences are encoded with two per byte, so they are concatenated
626    byte-wise.
627    Parameters: bsp_arr, the array of fake_bsps from which sequences are to be pulled
628                num_bsps, the maximum number of bsps in the array to use
629                is_na: true if query is nucleotide, false if amino acid seq
630                num_spacers: the number of spacer characters between the original
631                             sequences in the concatenated query. (AM:)
632    note: assumes that the array bsp_arr has at least num_bsp elements!
633 
634    AM: Changes to align on the byte boundaries.
635 */
636 BioseqPtr LIBCALL
BlastMakeFakeBspConcat(BspArray bsp_arr,Uint4 num_bsps,Boolean is_na,Uint4 num_spacers)637 BlastMakeFakeBspConcat PROTO((BspArray bsp_arr, Uint4 num_bsps, Boolean is_na, Uint4 num_spacers)){
638 	BioseqPtr tot;
639 	Int4 bsp_iter, letter_iter;
640 		/* "compact" len refers to #bytes to store the seq, not to #of letters */
641 	Int4 tot_len, tot_compact_len, spacer_compact_len;
642 	Int4 indiv_len;		/*keep an Int4 to satisfy BlastGetSequenceFromBioseq */
643 	Uint4 total_written;
644 	BioseqPtr curr_bsp;
645 	Uint1Ptr indiv_seq = NULL, concat_seq;
646 	ByteStorePtr byte_store;
647         Uint1 PNTR spacer_seq_nuc, PNTR spacer_seq_prot;
648 	SeqLocPtr slp = NULL;
649 	SeqPortPtr spp = NULL;
650 	Uint1 residue, shift = 0;
651 	Int4 index = 0;
652 	Uint1 newcode = Seq_code_ncbi4na, oldcode;
653         ByteStorePtr from = NULL,
654 	             to = NULL;
655 
656         /* AM: Determine the size and allocate spacer arrays. */
657         if( is_na )
658 	{
659 	  num_spacers = (num_spacers + 1)/2;
660 
661           /* Recode all BSPs to NCBI4na */
662 	  for( bsp_iter = 0; bsp_iter < num_bsps; ++bsp_iter )
663 	  {
664 	    oldcode = bsp_arr[bsp_iter]->seq_data_type;
665 	    if (oldcode == Seq_code_gap) continue;
666 	    from = (ByteStorePtr) bsp_arr[bsp_iter]->seq_data;
667 
668 	    if( !(to = BSConvertSeq( from, newcode, oldcode,
669 	                             bsp_arr[bsp_iter]->length )) )
670               return NULL;
671 
672             bsp_arr[bsp_iter]->seq_data_type = newcode;
673             bsp_arr[bsp_iter]->seq_data = (SeqDataPtr) to;
674 	     }
675         }
676 
677         spacer_seq_nuc = (Uint1 PNTR)MemNew( sizeof( Uint1 )*(num_spacers + 1) );
678 	spacer_seq_prot = spacer_seq_nuc;
679 
680 	/* create the spacer sequences to insert between sequences */
681 	spacer_compact_len = num_spacers;
682 
683 	for( letter_iter = 0; letter_iter < spacer_compact_len; ++letter_iter )
684 	  if( is_na ) spacer_seq_nuc[letter_iter] = TWO_N;
685 	  else spacer_seq_prot[letter_iter] = 'X';
686 
687         spacer_seq_nuc[letter_iter] = 0;
688 	tot = BioseqNew();
689 
690 	if (tot == NULL)
691 		return NULL;
692 
693 	/* initialize the fake_bsp with fields of the first bsp in the array */
694 	tot = (BioseqPtr) BlastMakeFakeBioseq( (BioseqPtr) *bsp_arr, NULL);
695 
696 	/* find total length of letters */
697 	tot_len = 0;
698 
699 	for( bsp_iter=0; bsp_iter<num_bsps; bsp_iter++ )
700 	{
701   	  curr_bsp = *(bsp_arr+bsp_iter);
702 	  tot_len += curr_bsp->length;
703 
704 	  if( is_na && bsp_iter != num_bsps - 1 && curr_bsp->length%2 ) ++tot_len;
705 	}
706 
707 	/* compact length indicates actual number of bytes in representation */
708 	tot_compact_len = (is_na) ? (tot_len + 1)/2 : tot_len;
709 	tot_compact_len += spacer_compact_len * (num_bsps-1);
710 
711 	/* total len is in terms of number of letters */
712 	tot_len += ( (is_na) ? num_spacers*2 : num_spacers) * (num_bsps-1);
713 
714 	concat_seq = MemNew((1 + tot_compact_len) * sizeof(Uint1));
715 	concat_seq[0] = 0;
716 
717 	if (concat_seq == NULL)
718 		return NULL;
719 
720         total_written = 0;
721 
722         for( bsp_iter = 0; bsp_iter < num_bsps; ++bsp_iter )
723 	{
724 	  curr_bsp = bsp_arr[bsp_iter];
725 	  indiv_len = curr_bsp->length;
726 
727 	  if( is_na )
728 	  {
729 	    /* indiv_seq = (Uint1Ptr)(bsp_arr[bsp_iter]->seq_data->chain->str); */
730 	    indiv_len = (indiv_len + 1)/2;
731 	    indiv_seq = MemNew( (1 + indiv_len)*sizeof( Uint1 ) );
732 
733 	    if( !indiv_seq ) return NULL;
734 
735             slp = NULL;
736 	    index = 0;
737 	    shift = 0;
738 	    spp = SeqPortNew( bsp_arr[bsp_iter], 0, -1, Seq_strand_plus, Seq_code_ncbi4na );
739 
740 	    while( (residue = SeqPortGetResidue( spp )) != SEQPORT_EOF )
741 	    {
742 	      if( shift )
743 	      {
744 	        indiv_seq[index/2] = ((indiv_seq[index/2])&0xF0);
745 	        residue = (residue&0xF);
746 		indiv_seq[index/2] |= residue;
747 	      }
748 	      else
749 	      {
750 	        indiv_seq[index/2] = 0;
751 	        residue = (residue << 4);
752 		indiv_seq[index/2] = residue;
753 	      }
754 
755 	      shift = 1 - shift;
756 	      ++index;
757 	    }
758 
759             indiv_seq[indiv_len] = 0;
760 	    spp = SeqPortFree( spp );
761 
762 	    if( bsp_iter )
763 	    {
764 	      StrCat( (CharPtr) concat_seq, (CharPtr) spacer_seq_nuc );
765 	      total_written += num_spacers;
766             }
767 
768 	    StrCat( (CharPtr) concat_seq, (CharPtr) indiv_seq );
769 	    total_written += indiv_len;
770 
771 	    if( curr_bsp->length%2 )
772 	    {
773 	      concat_seq[total_written - 1] = concat_seq[total_written - 1]&0xF0;
774 	      concat_seq[total_written - 1] = concat_seq[total_written - 1]|LAST_N;
775 	    }
776 
777 	    indiv_seq = MemFree( indiv_seq );
778 	  }
779 	  else
780 	  {
781  	    indiv_seq = (Uint1Ptr)BlastGetSequenceFromBioseq( bsp_arr[bsp_iter], &indiv_len );
782 
783 	    if( bsp_iter ) StrCat( (CharPtr) concat_seq, (CharPtr) spacer_seq_prot );
784 
785 	    StrCat( (CharPtr) concat_seq, (CharPtr) indiv_seq );
786             indiv_seq = MemFree(indiv_seq);
787 	  }
788 	}
789 
790 	byte_store = BSNew(0);
791 	BSWrite(byte_store, (VoidPtr) concat_seq, tot_compact_len);
792 
793 	tot->seq_data = (SeqDataPtr) byte_store;
794 	tot->length = tot_len;
795 
796         MemFree(concat_seq);
797         MemFree( (void *)spacer_seq_nuc );
798 	return (tot);
799 }
800 
801 /* Duplicates Queries structure for use with multiple threads. */
BlastDuplicateMultQueries(QueriesPtr source)802 QueriesPtr LIBCALL BlastDuplicateMultQueries PROTO(( QueriesPtr source ))
803 {
804   QueriesPtr result = NULL;
805   result = (QueriesPtr) MemNew (sizeof (Queries) );
806 
807   if( !result ) return NULL;
808 
809   result->NumQueries = source->NumQueries;
810   result->TotalLength = source->TotalLength;
811   result->FakeBsps = source->FakeBsps;
812   result->LCaseMasks = source->LCaseMasks;
813   result->QueryStarts = source->QueryStarts;
814   result->QueryEnds = source->QueryEnds;
815   result->WhichQuery = source->WhichQuery;
816   result->WhichPos = source->WhichPos;
817   result->EffLengths = source->EffLengths;
818   result->Adjustments = source->Adjustments;
819   result->SearchSpEff = source->SearchSpEff;
820   result->DbLenEff = source->DbLenEff;
821   result->MinLen = source->MinLen;
822   result->MinLenEff = source->MinLenEff;
823   result->MinDbLenEff = source->MinDbLenEff;
824   result->MinSearchSpEff = source->MinSearchSpEff;
825   result->LambdaMin = source->LambdaMin;
826   result->LambdaMax = source->LambdaMax;
827   result->LogKMin = source->LogKMin;
828   result->LogKMax = source->LogKMax;
829   result->HitListArray = NULL;
830   result->result_info = source->result_info;
831   result->max_results_per_query = source->max_results_per_query;
832   result->dropoff_2nd_pass_array = source->dropoff_2nd_pass_array;
833   result->lambda_array = source->lambda_array;
834   result->sap_array_data = source->sap_array_data;
835   return result;
836 }
837 
BlastMultQueriesDestruct(QueriesPtr queries)838 QueriesPtr LIBCALL BlastMultQueriesDestruct PROTO(( QueriesPtr queries ))
839 {
840     Int4 i;
841 
842     if( queries ) {
843         for( i = 0; i < queries->NumQueries; ++i ) {
844             MemFree( queries->result_info[i].results );
845 
846             if( queries->HitListArray )
847                 BlastHitListDestruct( queries->HitListArray[i] );
848         }
849 
850         if( queries->sap_array_data )
851             MemFree( queries->sap_array_data->sap_array );
852 
853         MemFree( queries->WhichPos );
854         MemFree( queries->WhichQuery );
855         MemFree( queries->sap_array_data );
856         MemFree( queries->dropoff_2nd_pass_array );
857         MemFree( queries->lambda_array );
858         MemFree( queries->result_info );
859         MemFree( queries->EffLengths );
860         MemFree( queries->Adjustments );
861         MemFree( queries->DbLenEff );
862         MemFree( queries->SearchSpEff );
863         MemFree( queries->QueryEnds );
864         MemFree( queries->QueryStarts );
865         MemFree( queries->FakeBsps );
866         MemFree( queries->HitListArray );
867     }
868 
869     return MemFree( queries );
870 }
871 
872 /* BlastMakeMultQueries
873    Given an array of fake_bsps corresponding to individual BLAST queries, returns
874    a pointer to a Queries structure containing information about the individual
875    queries.
876    Parameters: bsp_arr, the array of fake_bsps
877                num_queries, the maximum number of fake_bsps from the array to process
878    note: assumes that bsp_arr has minimum num_queries elements
879          Arrays created giving information about query number and position within
880          individual query for each location on the concatenated sequence are
881          based on numbering beginning with 1, not 0 for queries and positions
882 */
883 QueriesPtr LIBCALL
BlastMakeMultQueries(BspArray bsp_arr,Uint4 num_queries,Boolean is_na,Uint4 num_spacers,SeqLocPtr PNTR lcase_mask_arr)884 BlastMakeMultQueries PROTO((BspArray bsp_arr, Uint4 num_queries, Boolean is_na, Uint4 num_spacers, SeqLocPtr PNTR lcase_mask_arr))
885 {
886 	QueriesPtr queries;
887 	Int4 bsp_iter, pos_iter;
888 	BioseqPtr curr_bsp;
889 	Int4 curr_index;
890 	Int4 curr_length;
891         IntArray starts, ends;
892 	Int4 query, pos, query_holder;
893 	Int4 *which_query;
894 	Int4 *which_pos;
895 
896 	queries = (QueriesPtr) MemNew (sizeof (Queries) );
897 
898 	queries->NumQueries = num_queries;
899 	queries->FakeBsps = bsp_arr;
900 	queries->LCaseMasks = lcase_mask_arr;
901 
902 	/* AM: These has to be dynamically allocated (bug fix) */
903 	starts = (IntArray)MemNew( sizeof( Int4 )*num_queries );
904 	ends   = (IntArray)MemNew( sizeof( Int4 )*num_queries );
905 
906         /* AM: Inserted allocation of effective lengths, adjustment, search
907 	   space size, and dblen arrays. */
908 	queries->SearchSpEff = (FloatArray)MemNew( sizeof( Nlm_FloatHi )*num_queries );
909 	queries->DbLenEff = (Int8Array)MemNew( sizeof( Int8 )*num_queries );
910 	queries->Adjustments = (IntArray)MemNew( sizeof( Int4 )*num_queries );
911 	queries->EffLengths = (IntArray)MemNew( sizeof( Int4 )*num_queries );
912 	queries->result_info = (MQ_ResultInfoPtr)MemNew( sizeof( MQ_ResultInfo )*num_queries );
913 	queries->lambda_array = (Nlm_FloatHi PNTR)MemNew( sizeof( Nlm_FloatHi )*num_queries );
914 	queries->dropoff_2nd_pass_array
915 	  = (BLAST_Score PNTR)MemNew( sizeof( BLAST_Score )*num_queries );
916         queries->sap_array_data = (SapArrayDataPtr)MemNew( sizeof( SapArrayData ) );
917 
918 	queries->MinLen      = INT4_MAX;
919 	queries->MinLenEff   = INT4_MAX;
920 	queries->MinDbLenEff = INT8_MAX;
921 
922         queries->HitListArray = NULL;
923 	queries->use_mq = FALSE;
924 
925 	/* determine and record query starts and ends in the concatenated query */
926 	curr_index = 0;
927 	for (bsp_iter=0; bsp_iter<num_queries; bsp_iter++) {
928 		curr_bsp = *(bsp_arr + bsp_iter);
929 		curr_length = curr_bsp->length;
930 		starts[bsp_iter] = curr_index;
931 		curr_index += curr_length;
932 		ends[bsp_iter] = curr_index - 1;
933 		curr_index += num_spacers;
934 
935 		if( is_na && bsp_iter != num_queries - 1 && curr_length%2 ) ++curr_index;
936 	}
937 	curr_index -= num_spacers;
938 	/* now curr_index = total number of letters in concat. query */
939 	queries->QueryStarts = starts;
940 	queries->QueryEnds = ends;
941 
942 	/* create arrays showing querynum/pos in indiv. query for each letter in the
943            concatenated query sequence
944 	   assumes well-formed starts/ends arrays */
945 	which_query = MemNew (sizeof(Int4) * curr_index);
946 	which_pos = MemNew (sizeof(Int4) * curr_index);
947 	query = query_holder = 1;
948 	pos = 1;
949 	for (pos_iter=0; pos_iter<curr_index; pos_iter++) {
950 		/* zero-th letter in seq will be query 1, pos 1 */
951 		*(which_query + pos_iter) = query;
952 		*(which_pos + pos_iter) = pos;
953 
954 	  	if (ends[query_holder-1] == pos_iter) {
955 			query_holder++;
956 			query = SPACER_POS;
957 			pos = 1;
958 		} else if (starts[query_holder-1] == pos_iter) {
959 			query = query_holder;
960 			pos = 1;
961 		} else {
962 			pos++;
963 		}
964 	}
965 	queries->WhichQuery = which_query;
966 	queries->WhichPos = which_pos;
967 	return queries;
968 }
969 
970 /* ConcatSeqLoc():
971 
972    Takes a SeqLoc of type SEQLOCK_PACKED_INT that has offsets local to
973    individual queries and returns a new SeqLoc with offsets relative to
974    the concatenated query.
975 
976 */
ConcatSeqLoc(QueriesPtr mult_queries,SeqLocPtr loc,SeqIdPtr id,Uint4 qnum)977 SeqLocPtr LIBCALL ConcatSeqLoc PROTO(( QueriesPtr mult_queries, SeqLocPtr loc, SeqIdPtr id, Uint4 qnum ))
978 {
979   SeqLocPtr result = NULL;
980   SeqLocPtr current = NULL, newloc = NULL, last = NULL;
981   Uint4 start, end;
982 
983   if( mult_queries && loc->choice == SEQLOC_PACKED_INT )
984   {
985     result = ValNodeNew( NULL );
986     result->choice = loc->choice;
987     current = (SeqLocPtr)loc->data.ptrvalue;
988 
989     while( current )
990     {
991       start = SeqLocStart( current );
992       end = SeqLocStop( current );
993       start = mult_queries->QueryStarts[qnum] + start;
994       end = mult_queries->QueryStarts[qnum] + end;
995       newloc = SeqLocIntNew( start, end, SeqLocStrand( loc ), id );
996 
997       if( last )
998         last->next = newloc;
999       else
1000         result->data.ptrvalue = (void *)newloc;
1001 
1002       last = newloc;
1003       current = current->next;
1004     }
1005   }
1006 
1007   return result;
1008 }
1009 
1010 /* InitHitLists():
1011 
1012    This function breaks search->current_hitlist into multiple lists for
1013    each query and fills in the structures in search->mult_queries.
1014 
1015 */
InitHitLists(BlastSearchBlkPtr search)1016 void LIBCALL InitHitLists PROTO(( BlastSearchBlkPtr search ))
1017 {
1018   QueriesPtr queries;
1019   BLAST_HitListPtr * hitlist_array;
1020   BLAST_HitListPtr current_hitlist;
1021   BLAST_HSPPtr hsp;
1022   Uint4 i, query_num;
1023 
1024   if( !search ) return;
1025 
1026   queries = search->mult_queries;
1027   current_hitlist = search->current_hitlist;
1028 
1029   if( !queries || !current_hitlist ) return;
1030 
1031   hitlist_array = queries->HitListArray;
1032 
1033   if( !hitlist_array )
1034   {
1035     hitlist_array = (BLAST_HitListPtr *)MemNew(
1036       sizeof( BLAST_HitListPtr )*queries->NumQueries );
1037     queries->HitListArray = hitlist_array;
1038 
1039     for( i = 0; i < queries->NumQueries; ++i )
1040       hitlist_array[i] = BlastHitListNew( search );
1041   }
1042 
1043   for( i = 0; i < queries->NumQueries; ++i )
1044   {
1045     BlastHitListPurge( hitlist_array[i] );
1046     hitlist_array[i]->next = NULL;
1047     hitlist_array[i]->hspmax = current_hitlist->hspmax;
1048     hitlist_array[i]->hspcnt = 0;
1049     hitlist_array[i]->hspcnt_max = current_hitlist->hspcnt_max;
1050     hitlist_array[i]->further_process = current_hitlist->further_process;
1051     hitlist_array[i]->do_not_reallocate = current_hitlist->do_not_reallocate;
1052     MemFree( hitlist_array[i]->lh_helper );
1053     hitlist_array[i]->lh_helper = NULL;
1054     hitlist_array[i]->lh_helper_size = current_hitlist->lh_helper_size;
1055     MemFree( hitlist_array[i]->hsp_array );
1056     hitlist_array[i]->hsp_array = (BLAST_HSPPtr PNTR)MemNew(
1057       sizeof( BLAST_HSPPtr )*hitlist_array[i]->hspmax );
1058   }
1059 
1060   for( i = 0; i < current_hitlist->hspcnt; ++i )
1061   {
1062     hsp = current_hitlist->hsp_array[i];
1063 
1064     if( hsp )
1065     {
1066       query_num = GetQueryNum( queries, hsp->query.offset,
1067                                hsp->query.end, hsp->query.frame );
1068       hitlist_array[query_num]->hsp_array[hitlist_array[query_num]->hspcnt] = hsp;
1069       current_hitlist->hsp_array[i] = NULL;
1070       ++hitlist_array[query_num]->hspcnt;
1071     }
1072   }
1073 }
1074 /* TEMPORARY FUNCTION - FOR DEBUGGING ONLY */
MQ_CheckLists(BLASTResultHitlistPtr PNTR local,Int4 lsize,BLASTResultHitlistPtr PNTR res,Int4 rsize)1075 void LIBCALL MQ_CheckLists PROTO(( BLASTResultHitlistPtr PNTR local, Int4 lsize,
1076                                    BLASTResultHitlistPtr PNTR res, Int4 rsize ))
1077 {
1078   Int4 i, j;
1079 
1080   for( i = 0; i < lsize; ++i )
1081   {
1082     int found = 0;
1083 
1084     for( j = 0; j < rsize; ++j )
1085       if( local[i] == res[j] )
1086       {
1087         found = 1;
1088         break;
1089       }
1090 
1091     if( !found )
1092     {
1093       fprintf( stderr, "ERROR: %d is not in res\n", i );
1094       return;
1095     }
1096   }
1097 }
1098 
1099 /* ResultIndex1():
1100 
1101    This function first calls the binary search via ResultIndex(). Then tries
1102    to locate the exact object in the array by pointer.
1103 
1104 */
ResultIndex1(BLASTResultHitlistPtr ptr,BLASTResultHitlistPtr PNTR results,Int4 num_elements)1105 Int4 LIBCALL ResultIndex1 PROTO(( BLASTResultHitlistPtr ptr,
1106                                  BLASTResultHitlistPtr PNTR results,
1107 				 Int4 num_elements ))
1108 {
1109   Int4 del_index = ResultIndex( ptr->best_evalue, ptr->high_score,
1110                                 ptr->subject_id, results, num_elements );
1111   ASSERT( del_index < num_elements );
1112 
1113   if( results[del_index] == ptr ) return del_index;
1114 
1115   while( del_index >= 0 && results[del_index]->subject_id == ptr->subject_id )
1116     --del_index;
1117 
1118   while( ++del_index < num_elements && results[del_index] != ptr );
1119 
1120   ASSERT( del_index < num_elements );
1121 
1122   return del_index;
1123 }
1124 
1125 /* ResultIndex():
1126 
1127    This function performs a binary search in results for an element with
1128    goven evalue, score, and subject id. The number of elements in results
1129    is given by num_elements.
1130 
1131 */
ResultIndex(Nlm_FloatHi target_e,Int4 target_score,Int4 subject_id,BLASTResultHitlistPtr PNTR results,Int4 num_elements)1132 Int4 LIBCALL ResultIndex PROTO(( Nlm_FloatHi target_e, Int4 target_score, Int4 subject_id,
1133                   BLASTResultHitlistPtr PNTR results,
1134 		  Int4 num_elements ))
1135 {
1136   Int4 high_index, low_index, new_index, old_index;
1137 
1138   if( !num_elements ) return 0;
1139 
1140   high_index = 0;
1141   low_index = num_elements - 1;
1142   new_index = (high_index + low_index)/2;
1143   old_index = new_index;
1144 
1145   while( 1 )
1146   {
1147     if( results[new_index]->best_evalue > target_e )
1148       low_index = new_index;
1149     else if( results[new_index]->best_evalue < target_e )
1150       high_index = new_index;
1151     else if( results[new_index]->high_score < target_score )
1152       low_index = new_index;
1153     else if( results[new_index]->high_score > target_score )
1154       high_index = new_index;
1155     else if( results[new_index]->subject_id < subject_id )
1156       low_index = new_index;
1157     else high_index = new_index;
1158 
1159     new_index = (high_index + low_index)/2;
1160 
1161     if( old_index == new_index )
1162     {
1163       while( new_index < num_elements )
1164         if( results[new_index]->best_evalue < target_e ) ++new_index;
1165         else if( results[new_index]->best_evalue == target_e
1166                  && results[new_index]->high_score > target_score )
1167           ++new_index;
1168         else if( results[new_index]->best_evalue == target_e
1169 	         && results[new_index]->high_score == target_score
1170 		 && results[new_index]->subject_id > subject_id )
1171           ++new_index;
1172         else break;
1173 
1174       break;
1175     }
1176 
1177     old_index = new_index;
1178   }
1179 
1180   return new_index;
1181 }
1182 
1183 /* MQ_UpdateResultLists():
1184 
1185    Updates the individual hitlists in mult_queries removing
1186    unreferenced elements.
1187 
1188 */
MQ_UpdateResultLists(QueriesPtr mult_queries)1189 void LIBCALL MQ_UpdateResultLists PROTO(( QueriesPtr mult_queries ))
1190 {
1191   Uint4 qnum, index, new_index = 0;
1192   MQ_ResultInfoPtr result_info;
1193 
1194   for( qnum = 0; qnum < mult_queries->NumQueries; ++qnum )
1195   {
1196     result_info = mult_queries->result_info + qnum;
1197 
1198     for( index = 0; index < result_info->NumResults; ++index )
1199       if( result_info->results[index]->num_ref > 0 )
1200         result_info->results[new_index++] = result_info->results[index];
1201 
1202     result_info->NumResults = new_index;
1203   }
1204 }
1205 
1206