1 
2 static char const rcsid[] = "$Id: blast.c,v 6.451 2008/01/25 21:15:22 bealer Exp $";
3 
4 /* $Id: blast.c,v 6.451 2008/01/25 21:15:22 bealer Exp $
5 * ===========================================================================
6 *
7 *                            PUBLIC DOMAIN NOTICE
8 *               National Center for Biotechnology Information
9 *
10 *  This software/database is a "United States Government Work" under the
11 *  terms of the United States Copyright Act.  It was written as part of
12 *  the author's offical duties as a United States Government employee and
13 *  thus cannot be copyrighted.  This software/database is freely available
14 *  to the public for use. The National Library of Medicine and the U.S.
15 *  Government have not placed any restriction on its use or reproduction.
16 *
17 *  Although all reasonable efforts have been taken to ensure the accuracy
18 *  and reliability of the software and data, the NLM and the U.S.
19 *  Government do not and cannot warrant the performance or results that
20 *  may be obtained by using this software or data. The NLM and the U.S.
21 *  Government disclaim all warranties, express or implied, including
22 *  warranties of performance, merchantability or fitness for any particular
23 *  purpose.
24 *
25 *  Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================*/
28 
29 /*****************************************************************************
30 
31 File name: blast.c
32 
33 Author: Tom Madden
34 
35 Contents: BLAST functions
36 
37 Detailed Contents:
38 
39 	- Functions that allocate and deallocate structures used by BLAST.
40 
41 	- Functions that find the initial word hits for BLAST (both contiguous
42 	and discontiguous).
43 
44 	- Functions that extend these initial word hits and decide if the
45 	results HSP (High-Scoring Segment Pairs) are worth keeping.
46 
47 	- Functions that link together HSP's to a "hitlist".
48 
49 	- Functions that save the hitlist to a structure appropriate for
50 	further manipulation.
51 
52 ******************************************************************************
53  * $Revision: 6.451 $
54  *
55  * $Log: blast.c,v $
56  * Revision 6.451  2008/01/25 21:15:22  bealer
57  * - Fix synchronization issue with blastpgp -a4 -j4 when composition based
58  *   statistics is used for databases with multiple volumes.
59  *
60  * Revision 6.450  2007/05/07 13:30:54  kans
61  * added casts for Seq-data.gap (SeqDataPtr, SeqGapPtr, ByteStorePtr)
62  *
63  * Revision 6.449  2007/03/13 20:38:39  madden
64  *   - In BLASTCalculateSearchSpace, use floating point multiplication to
65  *     compute the floating point value searchsp.
66  *
67  *   - In BLASTSetUpSearchInternalByLoc, don't cast
68  *     DROPOFF_NUMBER_OF_BITS to an integer when assigning the floating
69  *     point options dropoff_1st_pass and dropoff_2nd_pass.
70  *
71  *   - In BLASTSetUpSearchInternalByLoc, use floating point division to
72  *     compute the floating point value avglen.
73  *
74  *   - In blast_set_parameters, change the type of the function arguments
75  *     dropoff_number_of_bits_1st_pass and
76  *     dropoff_number_of_bits_2nd_pass to Nlm_FloatHi.
77  *
78  *   - In blast_set_parameters, cast a value in the computation of
79  *     cutoff_s_first to type BLAST_Score only after dividing by Lambda,
80  *     instead of before performing the division.
81  *   [from Mike Gertz]
82  *
83  * Revision 6.448  2007/03/05 14:51:22  camacho
84  * - In BLASTPerformFinalSearch, merge the hitlists for PSITBLASTN, and is
85  *   done for TBLASTN.
86  * - In xsum_compare_hsps, break ties by calling score_compare_hsps.
87  *
88  * Revision 6.447  2006/09/21 13:42:36  madden
89  * BlastProcessGiLists returns a boolean to specify that an attempt was made to process a list of GIs.  If no matches were found this can be reported back to the user
90  *
91  * Revision 6.446  2006/06/01 15:48:38  papadopo
92  * in blastMergeFilterLocs, add the capability to merge mixed-type seqlocs; these appear in e.g. megablast with both low-complexity and repeat filtering
93  *
94  * Revision 6.445  2005/10/06 12:52:23  madden
95  * Changes to support correct gapped stats for blastn
96  *
97  * Revision 6.444  2005/09/29 17:40:08  coulouri
98  * from mike gertz:
99  *     In the do_gapped_blast_search routine, in the case where query
100  *     concatenation is used, call BlastLinkHsps only when
101  *     search->pbp->do_sum_stats is true.
102  *
103  * Revision 6.443  2005/09/26 15:02:58  morgulis
104  * Fixing some memort leaks when using query concatenation in blastn and tblastn.
105  *
106  * Revision 6.442  2005/08/31 20:32:31  coulouri
107  * From Mike Gertz:
108  *    - Added the function BlastSingleQueryResultSize to implement the
109  *      policy for adjusting the hitlist size for preliminary alignments
110  *      to a single query.
111  *    - In BLASTSetUpSearchWithReadDbInternalMult replaced existing code
112  *      for adjusting the hitlist size for a single query by a call to
113  *      BlastSingleQueryResultSize.
114  *    - In BLASTSetUpSearchEx, replaced existing code for adjusting the
115  *      hitlist size by a call to BlastSingleQueryResultSize.  This
116  *      changes the behavior of the routine slightly, in that the hitlist
117  *      size is (correctly) no longer increased for ungapped alignments.
118  *
119  * Revision 6.441  2005/07/28 14:57:09  coulouri
120  * remove dead code
121  *
122  * Revision 6.440  2005/07/27 15:51:54  coulouri
123  * remove unused queue_callback
124  *
125  * Revision 6.439  2005/05/19 11:11:59  coulouri
126  * Changes from morgulis to address rt ticket 15091715:
127  * null hsp_array in blastall tblastn query concatenation causes segfault
128  *
129  * Revision 6.438  2005/05/10 18:51:15  dondosha
130  * Removed unused functions and variables; moved sorting of HSPs by score after new_link_hsps inside this function
131  *
132  * Revision 6.437  2005/05/10 16:15:23  dondosha
133  * Back-porting changes in uneven gap HSP linking from algo/blast code: from Mike Gertz
134  *
135  * Revision 6.436  2005/05/06 11:49:22  coulouri
136  * remove unnecessary evalue check that results in instability of number_of_seqs_better_E; addresses rt ticket 15075332
137  *
138  * Revision 6.435  2005/05/02 16:03:14  coulouri
139  * refactor code to set db_chunk_size
140  *
141  * Revision 6.434  2005/04/25 14:16:36  coulouri
142  * set db_chunk_size adaptively
143  *
144  * Revision 6.433  2005/01/24 21:17:36  camacho
145  * 1. Changed implementation of RPSBlastResultHspScoreCmp to have the same
146  *    tie-breakers as score_compare_hsps
147  * 2. Renamed RPSBlastResultHspScoreCmp to BLASTResultHspScoreCmp
148  *
149  * Revision 6.432  2005/01/21 19:41:04  camacho
150  * Initialize variables
151  *
152  * Revision 6.431  2005/01/10 18:52:28  coulouri
153  * fixes from morgulis to allow concatenation of >255 queries in [t]blastn
154  *
155  * Revision 6.430  2004/12/29 13:26:28  madden
156  * One hit extension fixes so that:
157  * 1.) it is no longer iterative; now a left extension is performed and then a right extension.
158  * 2.) the left extension now stops when the score has dropped by xdrop, the right when the score goes to zero.
159  * 3.) fix one hit stopping criteria so that it is like two hit criteria.
160  * .
161  *
162  * Revision 6.429  2004/12/20 15:22:16  camacho
163  * Calculate kbp_ideal values rather than loading them from pre-computed values
164  *
165  * Revision 6.428  2004/12/14 14:07:54  madden
166  * Fix typo in if statement
167  *
168  * Revision 6.427  2004/11/30 16:33:16  dondosha
169  * Do not subtract starting offset in AdjustOffsetsInMaskLoc, because this is done in other functions after lower case mask is merged with filter mask
170  *
171  * Revision 6.426  2004/11/23 21:21:15  coulouri
172  * remove dead code, eliminate compiler warnings
173  *
174  * Revision 6.425  2004/11/22 15:43:24  dondosha
175  * Call AdjustOffsetsInMaskLoc for the options query_lcase_mask field, not parameters, to avoid leaving pointer to freed memory
176  *
177  * Revision 6.424  2004/11/19 13:22:05  madden
178  * Remove no_check_score completely (from Mike Gertz)
179  *
180  * Revision 6.423  2004/11/04 17:23:11  madden
181  * Fix for tblastn searches, do not mix HSPs from separate frames
182  *
183  * Revision 6.422  2004/11/04 15:51:55  bealer
184  * - bl2seq should use dblen as average length if database is not available.
185  *
186  * Revision 6.421  2004/11/01 14:07:06  madden
187  *    - In CalculateSecondCutoffScore use the number of starting points,
188  *      rather than the maxiumum size of the gap, when calculating the
189  *      cutoffs.
190  *
191  *      Recently, the meaning of search->pbp->gap_size was changed.
192  *      Previously, it represented the maximum number of permitted
193  *      starting points; now it represents the maximum permitted gap.
194  *      The CalculateSecondCutoffScore was not updated to reflect the new
195  *      meaning.  (The algo/blast/code was appropriately updated.)
196  *
197  *    - Remove the BlastReapHitlistByScore routine, and a call to the
198  *      routine in BLASTPerformFinalSearch.
199  *
200  * Revision 6.420  2004/10/25 18:30:21  papadopo
201  * From Michael Gertz:
202  * 1. Change BlastNtWordExtend to only terminate an ungapped alignment
203  *    if the running score fails the X-drop criterion, *not* if the score
204  *    becomes zero
205  * 2. Change BlastNtWordExtend to call BlastSaveCurrentHsp only for an
206  *    ungapped alignment, since it would choose an incorrect start point
207  *    for a gapped alignment
208  *
209  * Revision 6.419  2004/10/18 13:01:54  madden
210  * Changes from Mike Gertz:
211  *         - In xsum_compare_hsps change the comparison tests so that nil
212  *           HSPs are less than any non-nil HSP.  Previously, this
213  *           function would return 0 if either HSP was nil, which would
214  *           result in sort routines terminating before the non-nil HSPs
215  *           in the list were fully sorted.
216  *
217  *         - In rev_compare_hsps_cfj, reversed the order of the
218  *           comparsion on query.frame to make the sort order consistent
219  *           with the sort used in algo/blast/core/link_hsps.c.
220  *
221  * Revision 6.418  2004/10/07 13:07:06  madden
222  * Cast int to FloatHi to prevent wrap-around
223  *
224  * Revision 6.417  2004/09/30 12:10:19  madden
225  * Add function BlastReapHitlistByScore, use on gapped tblastn and blastx HSPs that do not acheive a high enough score for continued processing
226  *
227  * Revision 6.416  2004/09/28 15:59:40  papadopo
228  * Items 1 and 2 of version 6.414 were mistakenly left out
229  *
230  * Revision 6.415  2004/09/28 15:52:16  papadopo
231  * From Michael Gertz:
232  * 1. Undo previous fix to ungapped PSSM wordfinder (not necessary)
233  * 2. Modify square-matrix ungapped wordfinder to avoid occaisional
234  * 	incorrect choice of start offset for right extensions
235  * 3. Call BlastLinkHsps if and only if search->pbp->do_sum_stats is
236  * 	true; previously used the program number to decide
237  * 4. For ungapped blastx and tblastn, if longest_intron is not set
238  *         (i.e. = 0) or (longest_intron - 2)/3 is nonpositive, call
239  *         link_hsps. Otherwise call new_link_hsps.
240  * 5. For gapped blastx, tblastn or psitblastn, if longest_intron is
241  *         not set (i.e. = 0), set it to 122.  Then call new_link_hsps if
242  *         (longest_intron - 2)/3 is positive.  Otherwise turn off sum statistics.
243  * 6. In BlastLinkHsps, enabled the use of new_link_hsps for psitblastn.
244  * 7. Caused all routines for calculating the significance of multiple
245  * 	distinct alignments (BlastSmallGapSumE, BlastLargeGapSumE and
246  * 	BlastUnevenGapSumE) to use
247  *
248  * 	sum_{i in linked_set} (\lambda_i s_i - \ln K_i)
249  *
250  * 	as the weighted sum score. This change affects e-values in
251  * 	blastx and tblastx.
252  * 8. When computing normalized sum scores, use the ungapped values of
253  * 	(lambda, K) for ungapped alignments.
254  * 9. In SumHSPEvalue, for blastx, the subject_length must be divided by 3.
255  * 10. Pass the effective database size into BlastSmallGapSumE,
256  * 	BlastLargeGapSumE and BlastUnevenGapSumE.  The routines use this
257  * 	value in a simplified formula to compute the e-value of singleton sets.
258  * 11. Sort HSPs in new_link_hsps by normalized score, rather than score;
259  * 	for blastx, this places HSPs in the correct order of significance.
260  * 12. In new_link_hsps, set xsum field of every HSP to the appropriate
261  * 	value for a singleton set before doing any linking.
262  * 13. In both link_hsps and new_link_hsps use normalized sum score,
263  * 	rather than raw sum score, everywhere when choosing linked sets
264  * 14. Delete code in new_link_hsps for finding splice junctions.
265  * 15. Delete some unused variables in link_hsps.
266  *
267  * Revision 6.412  2004/09/22 16:44:48  dondosha
268  * Assign frames in ungapped blastn before any attempt to link HSPs, not only before second linking
269  *
270  * Revision 6.411  2004/09/21 16:28:23  dondosha
271  * Make sure first change in previous revision is applied only to blastn
272  *
273  * Revision 6.410  2004/09/21 13:58:46  dondosha
274  * 1. Assign HSP contexts and subject frames before linking HSPs after
275  *    reevaluation with ambiguities for ungapped blastn - necessary to distinguish
276  *    HSPs from different strands;
277  * 2. Use ideal Karlin-Altschul parameters for RPS tblastn instead of those for a
278  *    fake protein.
279  *
280  * Revision 6.409  2004/09/15 18:33:23  papadopo
281  * From Michael Gertz: modify two-hit ungapped code to compute the correct end offset even if an extension to the right does not happen
282  *
283  * Revision 6.408  2004/08/27 16:11:18  dondosha
284  * Changes in new_link_hsps from Mike Gertz: adjust singleton sets e-values by gap decay divisor; use effective db length for sum e-value calculations
285  *
286  * Revision 6.407  2004/08/16 19:37:26  dondosha
287  * Enabled uneven gap HSP linking for blastx
288  *
289  * Revision 6.406  2004/05/21 13:53:37  dondosha
290  * Fix in BLASTMergeHitLists
291  *
292  * Revision 6.405  2004/04/28 14:37:06  madden
293  * Changes from Mike Gertz
294  *  - modified the link_hsps routine to apply the gap_prob parameter to
295  *     the result of BlastSmallGapSumE and BlastLargeGapSumE.
296  *   - further modified link_hsps to use BlastGapDecayDivisor to weight
297  *     tests based on multiple collections of HSPs.
298  *   - removed all reference to gap_prob from the new_link_hsps.
299  *   - further modified new_link_hsps to use BlastGapDecayDivisor to weight
300  *     tests based on multiple collections of HSPs.
301  *
302  * Revision 6.404  2004/04/20 14:55:47  morgulis
303  * 1. Fixed query offsets in results when -B option is used.
304  * 2. Fixes for lower case masking handling with -B option.
305  *
306  * Revision 6.403  2004/04/13 21:03:30  madden
307  * Use ignore_gilist Boolean to determine whether gilist lookup should occur
308  *
309  * Revision 6.402  2004/03/31 17:58:51  papadopo
310  * Mike Gertz' changes for length adjustment calculations
311  *
312  * Revision 6.401  2004/03/22 15:35:39  dondosha
313  * 1. Do not allow cutoff score for saving HSPs to be smaller than gap trigger;
314  * 2. When merging hitlists with a restriction on number of HSPs, keep best
315  *    scoring ones.
316  *
317  * Revision 6.400  2004/02/26 15:52:29  papadopo
318  * Mike Gertz' modifications to unify handling of gapped Karlin blocks between protein and nucleotide searches
319  *
320  * Revision 6.399  2004/02/24 14:07:00  camacho
321  * Use approximate sequence length calculation for entrez-limited
322  * nucleotide blast databases.
323  *
324  * Revision 6.398  2004/02/03 17:54:16  dondosha
325  * Correction to revision 6.391 in function BlastGetDbChunk
326  *
327  * Revision 6.397  2004/01/06 22:37:10  dondosha
328  * Use BLAST_HSPfree function
329  *
330  * Revision 6.396  2003/12/29 15:42:46  coulouri
331  * tblastn query concatenation fixes from morgulis
332  *
333  * Revision 6.395  2003/12/12 16:01:23  madden
334  * Change to signature of BlastCutoffs, remove BlastCutoffs_simple
335  *
336  * Revision 6.394  2003/12/10 17:05:27  dondosha
337  * Added function ReevaluateScoreWithAmbiguities to reevaluate score for one HSP; use it after greedy traceback
338  *
339  * Revision 6.393  2003/11/19 18:09:13  dondosha
340  * Use consistent rounding in length adjustment calculation
341  *
342  * Revision 6.392  2003/11/10 20:15:29  dondosha
343  * Bug fix in BLASTMergeHsps
344  *
345  * Revision 6.391  2003/10/23 17:46:17  dondosha
346  * Fix in BlastGetDbChunk for looking up ordinal ids within a range
347  *
348  * Revision 6.390  2003/08/08 16:36:21  dondosha
349  * 1. Treat final_db_seq as 1 beyond the final sequence; 0 is an exception, meaning end of database.
350  * 2. Added more meaningful error message when query length is less than wordsize.
351  *
352  * Revision 6.389  2003/05/30 17:20:10  coulouri
353  * add rcsid
354  *
355  * Revision 6.388  2003/05/14 20:35:58  camacho
356  * Allow searching empty databases
357  *
358  * Revision 6.387  2003/05/13 16:02:53  coulouri
359  * make ErrPostEx(SEV_FATAL, ...) exit with nonzero status
360  *
361  * Revision 6.386  2003/05/12 12:23:43  camacho
362  * Sanity check for number of sequences & db length
363  *
364  * Revision 6.385  2003/04/23 15:15:36  camacho
365  * Moved reading of gi list to readdb
366  *
367  * Revision 6.384  2003/03/24 19:42:13  madden
368  * Changes to support query concatenation for blastn and tblastn
369  *
370  * Revision 6.383  2003/03/14 22:33:44  dondosha
371  * Do not increase preliminary hitlist size for ungapped search
372  *
373  * Revision 6.382  2003/03/06 19:10:42  madden
374  * Allow search->pbp->process_num to be > 1 if MT enabled
375  *
376  * Revision 6.381  2003/03/05 21:30:24  dondosha
377  * Fix in BlastMakeCopyQueryDNAP for single-strand OOF search
378  *
379  * Revision 6.380  2002/12/24 14:12:03  dondosha
380  * Removed accidental duplicate lines
381  *
382  * Revision 6.379  2002/12/10 23:13:22  bealer
383  * Fix do_the_blast_run and BlastGetDbChunk to calculate beginning and ending
384  * sequence numbers correctly.
385  * Fix BlastGetDbChunk to use precise start and end points, not nearest
386  * multiples of 32.
387  * Fix do_the_blast_run and BlastGetDbChunk to handle mixed oidlist / real db
388  * multiple database scenarios.
389  *
390  * Revision 6.378  2002/12/04 22:39:51  bealer
391  * Undo previous set of changes.
392  *
393  * Revision 6.377  2002/11/25 19:53:34  bealer
394  * Remove extraneous commented code.
395  *
396  * Revision 6.376  2002/11/25 19:50:26  bealer
397  * Prevent extra work by BlastGetDbChunk when OID lists are used.
398  *
399  * Revision 6.375  2002/11/13 18:03:10  dondosha
400  * Correction in BlastReevaluateWithAmbiguities
401  *
402  * Revision 6.374  2002/11/08 14:58:43  kans
403  * first argument to NlmReadMFILE must be cast as Uint1Ptr - Mac compiler picked up this inconsistency with the prototype
404  *
405  * Revision 6.373  2002/11/07 21:06:15  camacho
406  * Made GetGisFromFile work even without mmap
407  *
408  * Revision 6.372  2002/11/04 22:55:56  dondosha
409  * For blastn, calculate number of identities in BlastReevaluateWithAmbiguities
410  *
411  * Revision 6.371  2002/10/28 21:44:03  madden
412  * Added comments about gap-free extensions
413  *
414  * Revision 6.370  2002/09/18 20:23:19  camacho
415  * Added BLASTCalculateSearchSpace
416  *
417  * Revision 6.369  2002/09/11 20:46:25  camacho
418  * Removed deprecated BlastSeqIdListPtr code
419  *
420  * Revision 6.368  2002/08/30 18:56:02  dondosha
421  * Made BlastMakeTempProteinBioseq and HackSeqLocId public: needed for Cn3D
422  *
423  * Revision 6.367  2002/08/30 15:42:48  dondosha
424  * In blastn, use ewp structure only for the first context
425  *
426  * Revision 6.366  2002/08/22 13:39:45  camacho
427  * Close the header and sequence files only if allocated
428  *
429  * Revision 6.365  2002/08/07 21:37:47  camacho
430  * Do not remove the search block prematurely in do_gapped_blast_search
431  *
432  * Revision 6.364  2002/08/06 17:33:50  madden
433  * Fix return value problem
434  *
435  * Revision 6.363  2002/07/19 17:55:47  dondosha
436  * 1.Return 0 status from BLASTPerformFinalSearch when database sequence has 0 length;
437  * 2. Do not destroy search block too early.
438  *
439  * Revision 6.362  2002/07/15 18:53:27  camacho
440  * Small fix to previous commit
441  *
442  * Revision 6.361  2002/07/14 17:18:13  camacho
443  * Fixed small memory leak in do_blast_search/do_gapped_blast_search
444  *
445  * Revision 6.360  2002/07/12 18:02:55  dondosha
446  * Do not call AdjustOffsetsInMaskLoc if no lower case mask
447  *
448  * Revision 6.359  2002/07/12 16:06:26  dondosha
449  * Adjust offsets and remove unneeded lower case mask locations when query is a subsequence
450  *
451  * Revision 6.358  2002/06/27 13:01:26  kans
452  * BlastGetVirtualOIDList is LIBCALL
453  *
454  * Revision 6.357  2002/06/26 00:56:28  camacho
455  *
456  * 1. Fixed bug when searching a mixture of real and mask databases.
457  * 2. Clean up of code that calculates the number of sequences and database
458  *    length.
459  *
460  * Revision 6.356  2002/06/25 16:43:45  dondosha
461  * Get out from all search loops if bad status returned, meaning process ran out of memory
462  *
463  * Revision 6.355  2002/06/25 13:11:22  madden
464  * Fix UMR for status in do_gapped_blast_search
465  *
466  * Revision 6.354  2002/06/21 21:49:10  camacho
467  * Removed references to thr_info->blast_seqid_list in BlastGetDbChunk
468  *
469  * Revision 6.353  2002/06/12 15:43:09  dondosha
470  * Potential uninitialized variable bug fixed
471  *
472  * Revision 6.352  2002/06/12 15:33:25  dondosha
473  * Corrected integer types of the variable holding return status in 2 functions
474  *
475  * Revision 6.351  2002/06/11 20:40:04  dondosha
476  * Correction to previous change
477  *
478  * Revision 6.350  2002/06/11 14:44:45  dondosha
479  * Return status from some functions instead of search block pointer
480  *
481  * Revision 6.349  2002/06/05 15:30:34  coulouri
482  * Move signal handling to blastsrv.c
483  *
484  * Revision 6.348  2002/05/20 22:49:10  dondosha
485  * Fix for the Mega BLAST case when database sequence is split, and an HSP is accidentally extended across the boundary to a completely masked query
486  *
487  * Revision 6.347  2002/05/15 19:51:01  dondosha
488  * Do a sanity check for the final db sequence parameter
489  *
490  * Revision 6.346  2002/04/23 16:01:27  madden
491  * Fix for ungapped search of arbitrary matrix
492  *
493  * Revision 6.345  2002/04/23 15:40:10  madden
494  * Fix for effective length change and ungapped blast
495  *
496  * Revision 6.344  2002/04/19 21:22:30  madden
497  * Added protection for matrices that are only empty strings
498  *
499  * Revision 6.343  2002/04/18 12:07:05  madden
500  * Check for Selenocysteine in Bioseq, replace with X
501  *
502  * Revision 6.342  2002/04/17 17:30:15  madden
503  * Call getAlphaBeta only for gapped alignments
504  *
505  * Revision 6.341  2002/04/16 15:42:15  madden
506  * Save mask1 for lookup table hashing only (change for neighboring)
507  *
508  * Revision 6.340  2002/04/04 21:19:15  dondosha
509  * Corrections for megablast with non-greedy extensions
510  *
511  * Revision 6.339  2002/03/26 21:20:50  dondosha
512  * 1. Make hitlist size larger for preliminary gapped alignment
513  * 2. Pass readdb structure to megablast set up if it is already initialized
514  *
515  * Revision 6.338  2002/03/26 16:46:40  madden
516  * Move calculation of effective lengths to BlastCalculateEffectiveLengths
517  *
518  * Revision 6.337  2002/03/06 18:34:31  dondosha
519  * Pass the filtered locations back from the megablast engine to use in formatting
520  *
521  * Revision 6.336  2002/02/27 22:39:00  dondosha
522  * Fixed bug in splitting long database sequences for translated searches
523  *
524  * Revision 6.335  2002/02/27 17:43:20  dondosha
525  * Made effective database length option work properly
526  *
527  * Revision 6.334  2002/02/26 22:25:20  dondosha
528  * Return error as soon as it is found that matrix name is not supported
529  *
530  * Revision 6.333  2002/02/26 17:37:40  dondosha
531  * Fixed bug in BlastNtWordFinder for word sizes > 12
532  *
533  * Revision 6.332  2002/02/26 15:03:13  dondosha
534  * Accidental newline in sprintf removed
535  *
536  * Revision 6.331  2002/02/25 23:26:57  dondosha
537  * Changed error to warning if no letters to be indexed just on one context
538  *
539  * Revision 6.330  2002/01/04 22:01:33  coulouri
540  * Fixed BlastSetLimits() to work under linux
541  *
542  * Revision 6.329  2002/01/04 20:16:12  dondosha
543  * Correction for single strand blastx with OOF gapping
544  *
545  * Revision 6.328  2001/12/28 20:38:40  dondosha
546  * Moved Mega BLAST related parameters into a separate structure
547  *
548  * Revision 6.327  2001/12/17 17:31:35  madden
549  * Fix memory leaks
550  *
551  * Revision 6.326  2001/12/14 21:04:31  madden
552  * Reinit start to zero for every frame
553  *
554  * Revision 6.325  2001/12/10 23:04:19  dondosha
555  * Corrected how number of db sequences is set when gi list exists
556  *
557  * Revision 6.324  2001/11/23 21:11:16  dondosha
558  * Correction to previous change
559  *
560  * Revision 6.323  2001/11/23 19:57:55  dondosha
561  * Correction for bl2seq related to recent changes in megablast
562  *
563  * Revision 6.322  2001/11/14 23:39:31  dondosha
564  * Switched return value for BlastNtWordUngappedExtend
565  *
566  * Revision 6.321  2001/11/13 18:17:26  dondosha
567  * Added BlastNtWordUngappedExtend for use in Mega BLAST
568  *
569  * Revision 6.320  2001/09/21 14:42:08  dondosha
570  * Correction of previous fix in BlastReapPartialHitlistByEvalue for non-megablast programs
571  *
572  * Revision 6.319  2001/09/20 14:39:15  madden
573  * Fix for non-blastn programs in BlastReapPartialHitlistByEvalue
574  *
575  * Revision 6.318  2001/09/17 16:33:39  dondosha
576  * Bug fix in BlastReapPartialHitlistByEvalue
577  *
578  * Revision 6.317  2001/09/11 14:28:30  madden
579  * Added timed_out Boolean to SearchBlk
580  *
581  * Revision 6.316  2001/09/07 14:46:43  dondosha
582  * Roll back removal of threshold_first from functions and structures
583  *
584  * Revision 6.315  2001/09/06 20:24:33  dondosha
585  * Removed threshold_first
586  *
587  * Revision 6.314  2001/08/10 14:55:55  madden
588  * Add pv_array for multiple hits blastn
589  *
590  * Revision 6.313  2001/07/24 19:50:32  dondosha
591  * Do not create a star_proc thread if there is no tick_proc
592  *
593  * Revision 6.312  2001/07/20 18:52:25  dondosha
594  * Removed unused code
595  *
596  * Revision 6.311  2001/07/18 19:24:17  madden
597  * Set options->dbseq_num if use_real_db TRUE
598  *
599  * Revision 6.310  2001/07/09 14:17:23  madden
600  * Fix PC-lint complaints from R. Williams
601  *
602  * Revision 6.309  2001/07/09 13:12:02  madden
603  * Removed unused variables
604  *
605  * Revision 6.308  2001/07/06 15:22:42  madden
606  * Correction for BLASTN
607  *
608  * Revision 6.307  2001/06/29 18:07:20  madden
609  * Fix problem with scalingFactor
610  *
611  * Revision 6.306  2001/06/28 13:42:09  madden
612  * Fixes to prevent overflow on number of hits reporting
613  *
614  * Revision 6.305  2001/06/27 17:46:33  madden
615  * Add mutex to protect number_of_pos_hits, found by H. Gabb at KAI
616  *
617  * Revision 6.304  2001/06/26 20:37:04  madden
618  * Fixes for realdb_done problem found by H. Gabb at KAI
619  *
620  * Revision 6.303  2001/06/25 16:03:12  madden
621  * Correctly set gapped_start for blastn
622  *
623  * Revision 6.302  2001/06/21 21:29:07  dondosha
624  * Fixed memory leaks: destroy all error returns, free private_slp
625  *
626  * Revision 6.301  2001/06/15 16:38:45  dondosha
627  * Correction to previous changes
628  *
629  * Revision 6.300  2001/06/14 22:09:14  dondosha
630  * Rearranged code for gi lists and oid masks processing to get rid of duplication
631  *
632  * Revision 6.299  2001/06/13 21:45:08  dondosha
633  * Search of multiple databases with gi files implemented
634  *
635  * Revision 6.298  2001/06/12 19:48:55  madden
636  * Introduce total_hsp_limit, check before making SeqAlign
637  *
638  * Revision 6.297  2001/05/25 19:34:17  vakatov
639  * Nested comment typo fixed
640  *
641  * Revision 6.296  2001/05/04 15:59:46  dondosha
642  * Function BlastFillQueryOffsets now has an extra argument for megablast use
643  *
644  * Revision 6.295  2001/05/03 21:48:28  dondosha
645  * Handle some cases when memory allocation fails
646  *
647  * Revision 6.294  2001/04/23 17:09:18  madden
648  * Use StringSave for gifile variable
649  *
650  * Revision 6.293  2001/04/16 16:37:01  madden
651  * Restore old length correction behavior for blastn
652  *
653  * Revision 6.292  2001/04/13 20:46:42  madden
654  * Changed edge effect correction in BLASTSetUpSearchInternalByLoc to use new method with  alpha and beta parameters from Altschul, Bundschuh, Olsen, Hwa, Nucleic Acids Research 29(2001), 351-361.
655  *
656  * Revision 6.291  2001/04/11 20:56:06  madden
657  * Added scalingFactor for rpsblast
658  *
659  * Revision 6.290  2001/04/04 20:31:16  dondosha
660  * Bug fix for blastx with a subsequence query
661  *
662  * Revision 6.289  2001/04/02 15:55:27  dondosha
663  * Check HSP frames when merging hitlists from split subject sequence
664  *
665  * Revision 6.288  2001/03/30 23:53:45  dondosha
666  * Correction in splitting long database sequences for tblastn
667  *
668  * Revision 6.287  2001/03/19 18:53:45  madden
669  * Added call to BlastSeqLocFillDoubleIntEx, changed call to BlastSeqLocFillDoubleIntRev
670  *
671  * Revision 6.286  2001/03/14 14:54:35  madden
672  * fix problem with partial translating query
673  *
674  * Revision 6.285  2001/03/12 21:38:59  dondosha
675  * Bug fix in database sequence splitting change
676  *
677  * Revision 6.284  2001/03/08 22:05:47  dondosha
678  * Split very long database sequences in all BLAST programs
679  *
680  * Revision 6.283  2001/03/07 14:09:17  madden
681  * Set multiple_hits depending on option block
682  *
683  * Revision 6.282  2001/03/06 22:02:32  dondosha
684  * Rolled back accidental change in BlastReevaluateWithAmbiguities
685  *
686  * Revision 6.281  2001/03/01 15:41:33  dondosha
687  * Added protection from infinite loop in new_link_hsps
688  *
689  * Revision 6.280  2001/01/24 21:55:53  dondosha
690  * Correction to previous change
691  *
692  * Revision 6.279  2001/01/24 20:51:49  dondosha
693  * Enabled splitting of the second sequence for 2 sequences with megablast
694  *
695  * Revision 6.278  2001/01/19 17:23:16  madden
696  * Optimization for 2-hit blastn
697  *
698  * Revision 6.277  2001/01/16 14:03:53  madden
699  * Enable gapped check for blastn immediately after finding hits
700  *
701  * Revision 6.276  2001/01/09 20:10:37  shavirin
702  * Added sorting of all hits in result_struct for every element in
703  * results. Added function RPSResultHspScoreCmp.
704  *
705  * Revision 6.275  2001/01/08 20:21:40  dondosha
706  * Adjust subject offset in the gap edit blocks if database sequence was split in megablast search
707  *
708  * Revision 6.274  2001/01/03 21:45:29  dondosha
709  * Fixed a memory leak - some edit blocks not freed in megablast
710  *
711  * Revision 6.273  2001/01/02 22:29:45  dondosha
712  * Assign virtual oidlist to the first non-whole database rdfp in the linked list
713  *
714  * Revision 6.272  2000/12/28 18:22:29  madden
715  * Fixes to BlastNtWordFinder_mh
716  *
717  * Revision 6.271  2000/12/27 16:51:17  dondosha
718  * When splitting database sequence for megablast, keep only significant HSPs from partial hitlists
719  *
720  * Revision 6.270  2000/12/26 17:50:46  dondosha
721  * Fixed bug in BLASTMergeHsps function for merging HSPs after splitting of a database sequence
722  *
723  * Revision 6.269  2000/12/21 17:37:24  dondosha
724  * Fixed bug with minus-strand blastn search
725  *
726  * Revision 6.268  2000/12/20 15:44:01  madden
727  * Better error message if query is shorter than wordsize
728  *
729  * Revision 6.267  2000/12/18 20:38:55  shavirin
730  * Removed include <time.h> before <ncbi.h>.
731  *
732  * Revision 6.266  2000/12/07 17:45:13  dondosha
733  * Use actual subject sequence length in GreedyAlignMemAlloc for 2 Sequences engine
734  *
735  * Revision 6.265  2000/12/04 18:51:23  madden
736  * Fix memory leaks
737  *
738  * Revision 6.264  2000/11/29 16:58:16  dondosha
739  * Small fix to previous revision
740  *
741  * Revision 6.263  2000/11/29 16:29:31  dondosha
742  * For megablast, allow splitting of long subject sequences and merging hitlists
743  *
744  * Revision 6.262  2000/11/17 17:51:59  dondosha
745  * Removed is_megablast argument from BLASTSetUpSearchWithReadDbInternalEx since it is part of options
746  *
747  * Revision 6.261  2000/11/13 20:38:48  madden
748  * Fix for zero length db sequence in ungapped blast
749  *
750  * Revision 6.260  2000/11/09 14:59:38  dondosha
751  * Longest intron length in options set in nucleotide coordinates
752  *
753  * Revision 6.259  2000/11/08 22:21:32  dondosha
754  * Enabled new tblastn by adding a longest_intron option
755  *
756  * Revision 6.258  2000/11/07 16:30:24  madden
757  * Introduce intermediate score (before linking of HSPs) for blastx and tblastn
758  *
759  * Revision 6.257  2000/11/03 20:13:55  dondosha
760  * Do not call readdb_get_sequence_ex from new_link_hsps for two sequences BLAST
761  *
762  * Revision 6.256  2000/11/01 16:25:58  madden
763  * Changes from Futamura for psitblastn
764  *
765  * Revision 6.255  2000/11/01 00:05:18  vakatov
766  * Added missing "LIBCALL"
767  *
768  * Revision 6.254  2000/10/31 16:30:56  shavirin
769  * Function BLASTSetUpSearchInternalByLoc became external.
770  *
771  * Revision 6.253  2000/10/30 16:51:04  shavirin
772  * Changed function with creation temporary bioseqs for SEG filtering.
773  *
774  * Revision 6.252  2000/10/26 18:45:58  dondosha
775  * Check if gi list file is provided from the db alias
776  *
777  * Revision 6.251  2000/10/24 19:05:45  dondosha
778  * Moved function UniqueLocalId to sequtil.c
779  *
780  * Revision 6.250  2000/10/06 21:36:02  dondosha
781  * Do not multiply window size by 3 for subject in new_link_hsps
782  *
783  * Revision 6.249  2000/10/06 16:36:57  shavirin
784  * Correctly closed file with gi list in the function GetGisFromFile().
785  *
786  * Revision 6.248  2000/10/05 19:54:50  dondosha
787  * For Mega BLAST, call MegaBlastSaveCurrentHitlist instead of BlastSaveCurrentHitlist
788  *
789  * Revision 6.247  2000/09/28 15:05:59  dondosha
790  * Added splice junction search; corrected sum evalue calculation
791  *
792  * Revision 6.246  2000/09/28 14:27:52  madden
793  * Correct use of search space for linked hsps
794  *
795  * Revision 6.245  2000/09/18 16:04:38  madden
796  * No call to BlastFindWords if rpsblast
797  *
798  * Revision 6.244  2000/09/14 14:58:20  dondosha
799  * Further improvements with new tblastn (still not in the executable)
800  *
801  * Revision 6.243  2000/09/12 16:11:31  dondosha
802  * Changed window size, plus some bug fixes for new_link_hsps
803  *
804  * Revision 6.242  2000/09/01 18:25:10  dondosha
805  * Pass start and length to BlastFindWords, not start and end
806  *
807  * Revision 6.241  2000/09/01 13:47:39  shavirin
808  * Fixed error and typecast warnings from Windows NT compilation.
809  *
810  * Revision 6.240  2000/08/31 18:37:22  shavirin
811  * Added check for NULL in BlastMakeCopyQueryDNAP().
812  *
813  * Revision 6.239  2000/08/31 17:06:20  shavirin
814  * Added few OOF related functions to copy and delete query_dnap.
815  *
816  * Revision 6.238  2000/08/31 15:59:12  dondosha
817  * No need to call ReadDBFreeSharedInfo from do_the_blast_run
818  *
819  * Revision 6.237  2000/08/29 19:36:37  madden
820  * Do not lookup gis if gilist_already_calculated is set
821  *
822  * Revision 6.236  2000/08/29 18:09:34  dondosha
823  * Adjust the reverse strand offsets for non-megablast blastn in BlastSaveCurrentHitlist
824  *
825  * Revision 6.235  2000/08/25 22:41:49  dondosha
826  * Do reevaluation of score with ambiguities for megablast
827  *
828  * Revision 6.234  2000/08/23 18:48:44  madden
829  * Use BlastKarlinBlkGappedCalcEx in place of BlastKarlinBlkGappedCalc
830  *
831  * Revision 6.233  2000/08/18 20:12:28  dondosha
832  * Do not use search->query_id in megablast, use only qid_array
833  *
834  * Revision 6.232  2000/08/07 16:59:49  dondosha
835  * Correct construction of path for gi list file
836  *
837  * Revision 6.231  2000/08/03 17:50:37  dondosha
838  * Check HSPs for going beyond ends of query in megablast
839  *
840  * Revision 6.230  2000/08/02 15:26:09  dondosha
841  * For megablast compute search space depending on query when getting evalue
842  *
843  * Revision 6.229  2000/07/25 16:52:49  shavirin
844  * Corrected function BlastCreateQueryDNAP().
845  *
846  * Revision 6.228  2000/07/24 16:12:05  hurwitz
847  * made definition of BLASTSetUpSearchWithReadDbInternalEx match the one in blastpri.h
848  *
849  * Revision 6.227  2000/07/21 21:26:43  dondosha
850  * Added BLASTSetUpSearchWithReadDbInternalEx with Boolean argument is_megablast
851  *
852  * Revision 6.226  2000/07/18 22:32:38  shavirin
853  * Adjusted space allocated for DNA-P query sequence
854  *
855  * Revision 6.225  2000/07/17 14:17:10  shavirin
856  * Added new function BlastCreateQueryDNAP() and OOF_TranslateToDNAP() and
857  * support for Out of frame gap algorithm.
858  *
859  * Revision 6.224  2000/07/12 13:36:29  shavirin
860  * Removed last NULL parameter from MegaBlastSetUpSearchInternalByLoc().
861  *
862  * Revision 6.223  2000/07/11 17:16:19  shavirin
863  * Added new parameter is_ooframe for Out-Of-Frame gapping algorithm.
864  *
865  * Revision 6.222  2000/06/22 22:28:07  dondosha
866  * Only look at HSPs up to hspcnt_max in BlastSaveCurrentHitlist - this allows not to use MemNew when initializing hsp_array
867  *
868  * Revision 6.221  2000/06/22 14:08:20  madden
869  * Fix bug in BlastWordExtend_prelim if word-hit is at end of sequence
870  *
871  * Revision 6.220  2000/06/08 20:34:10  madden
872  * add explode_seqids option to show all ids in a defline
873  *
874  * Revision 6.219  2000/05/25 21:03:56  dondosha
875  * In BlastSaveCurrentHitlist assign hspcnt for result hitlist correctly
876  *
877  * Revision 6.218  2000/05/24 19:48:06  dondosha
878  * Moved initialization of qid_array in megablast to search set-up
879  *
880  * Revision 6.217  2000/05/19 19:36:18  madden
881  * Fix for longer words in BlastNtWordFinder, do not call BlastNTPreliminaryGappedScore
882  *
883  * Revision 6.216  2000/05/17 17:13:36  dondosha
884  * Removed some unused variables
885  *
886  * Revision 6.215  2000/05/16 19:59:24  madden
887  * Do no set ignore_small_gaps to TRUE
888  *
889  * Revision 6.214  2000/05/12 19:42:29  dondosha
890  * Use array instead of linked list of query ids in megablast
891  *
892  * Revision 6.213  2000/05/12 18:53:25  shavirin
893  * Fixed memory leak with OIDList.
894  *
895  * Revision 6.212  2000/05/11 18:02:23  shavirin
896  * Minor change for using gi_list together with oid-databasees.
897  *
898  * Revision 6.211  2000/05/09 19:42:49  shavirin
899  * Fixed in BlastGetDbChunk() no-mutex regular database case.
900  *
901  * Revision 6.210  2000/05/03 17:08:26  shavirin
902  * Fixed minor bug in the function BLASTSetUpSearchWithReadDbInternal().
903  *
904  * Revision 6.209  2000/05/01 21:24:54  dondosha
905  * Changed greedy_gapped_align to MegaBlastGreedyAlign
906  *
907  * Revision 6.208  2000/04/28 17:51:49  shavirin
908  * Replaced define RPS_BLAST with checking parameter is_rps_blast.
909  *
910  * Revision 6.207  2000/04/25 19:05:13  dondosha
911  * Before search assign db_chunk_last to first_db_seq
912  *
913  * Revision 6.206  2000/04/24 16:43:51  dondosha
914  * Call BlastReapHitlistByEvalue in MegaBlast if hitlists are saved
915  *
916  * Revision 6.205  2000/04/20 15:12:32  dondosha
917  * Bug fix for minus-strand only search - do not try to concatenate second strand to first
918  *
919  * Revision 6.204  2000/04/11 12:47:08  madden
920  * Proper casting to Int8
921  *
922  * Revision 6.203  2000/04/10 20:01:24  dondosha
923  * Fill both strands mask locations in a one location list for blastn
924  *
925  * Revision 6.202  2000/04/10 17:16:44  madden
926  * Make search_sp Int8 to prevent overflow
927  *
928  * Revision 6.201  2000/04/10 15:24:24  dondosha
929  * Enabled use of MegaBlast for BlastTwoSequences
930  *
931  * Revision 6.200  2000/04/07 20:19:45  dondosha
932  * Do not call BlastReapHitlistByEvalue for megablast
933  *
934  * Revision 6.199  2000/04/07 16:43:25  dondosha
935  * Assign dbseq_num to min of actual db size and gilist size
936  *
937  * Revision 6.198  2000/04/07 13:11:56  shavirin
938  * Checked for queue_callback != NULL.
939  *
940  * Revision 6.197  2000/04/06 13:13:33  shavirin
941  * Changed sequence to post semaphore info for internal queueing.
942  *
943  * Revision 6.196  2000/04/04 20:48:21  dondosha
944  * Fixed a memory leak in saving hitlists for MegaBlast
945  *
946  * Revision 6.195  2000/04/04 16:16:59  dondosha
947  * Fixed some memory leaks in MegaBlast traceback
948  *
949  * Revision 6.194  2000/04/03 21:21:44  dondosha
950  * Assign is_neighboring parameter from option
951  *
952  * Revision 6.193  2000/03/31 21:14:24  dondosha
953  * Changed some names related to MegaBlast
954  *
955  * Revision 6.192  2000/03/31 16:50:51  dondosha
956  * Sort hsps and remove redundant when saving hitlist in MegaBlast
957  *
958  * Revision 6.191  2000/03/30 21:45:04  madden
959  * Add call to BLASTResultHitlistFreeEx
960  *
961  * Revision 6.190  2000/03/29 22:19:43  dondosha
962  * BlastSaveCurrentHitlist adjusts query offsets for blastn; creates seqaligns for MegaBlast
963  *
964  * Revision 6.189  2000/03/27 16:46:22  madden
965  * Moved call to BlastFillQueryOffsets to BLASTSetUpSearchInternalByLoc
966  *
967  * Revision 6.188  2000/03/23 20:51:15  dondosha
968  * Set dbseq_num to gi_list_total if search space is not recalculated and gi_list exists
969  *
970  * Revision 6.187  2000/03/22 18:08:59  dondosha
971  * Free rdfp->shared_info in single threaded case the same way as in multithreaded after the search
972  *
973  * Revision 6.186  2000/03/14 21:01:16  dondosha
974  * Call BlastTickProc even when gi_list is set
975  *
976  * Revision 6.185  2000/03/13 21:11:35  dondosha
977  * Check options parameters use_real_db_size and sort_gi_list when dealing with gi_list
978  *
979  * Revision 6.184  2000/03/03 18:02:05  shavirin
980  * Added support for low character filering in "blastx", "tblastx"
981  * and translated RPS Blast.
982  *
983  * Revision 6.183  2000/03/03 17:41:09  egorov
984  * fix memory leak with oidlist
985  *
986  * Revision 6.182  2000/03/02 21:24:16  shavirin
987  * Checked for SEQLOC_PACKED_INT in blastMergeFilterLocs()
988  *
989  * Revision 6.181  2000/03/02 18:30:46  dondosha
990  * Minor bug fix in BlastSaveCurrentHsp for blastn
991  *
992  * Revision 6.180  2000/03/02 17:11:01  dondosha
993  * Fixed bug with one strand search option for blastn
994  *
995  * Revision 6.179  2000/03/01 21:40:53  shavirin
996  * Added code to filter lower-case character regions (except blastx and tblastx)
997  *
998  * Revision 6.178  2000/02/29 18:17:24  shavirin
999  * Variable query_dna_mask changed to query_lcase_mask.
1000  *
1001  * Revision 6.177  2000/02/29 18:09:36  dondosha
1002  * Call BlastFillQueryOffsets in BLASTSetUpSearchEx
1003  *
1004  * Revision 6.176  2000/02/23 20:56:51  dondosha
1005  * Returning strand concatenation for blastn with bug fixes
1006  *
1007  * Revision 6.175  2000/02/18 15:30:36  shavirin
1008  * Added parameter query_dna_mask into options and parameters.
1009  *
1010  * Revision 6.174  2000/02/17 21:23:08  shavirin
1011  * Added parameter is_rps_blast.
1012  *
1013  * Revision 6.173  2000/02/17 19:02:08  shavirin
1014  * Removed all references to absolete theCacheSize variable.
1015  *
1016  * Revision 6.172  2000/02/17 18:29:02  shavirin
1017  * Added function DefineToFrame().
1018  *
1019  * Revision 6.171  2000/02/16 21:47:45  shavirin
1020  * Fixed memory leaks in the function BlastReapHitlistByEvalue ().
1021  *
1022  * Revision 6.170  2000/02/15 21:02:00  shavirin
1023  * Added support to filter DNA sequence in translated RPS Blast.
1024  *
1025  * Revision 6.169  2000/02/15 19:17:29  shavirin
1026  * Added filter_string to Parameters block.
1027  *
1028  * Revision 6.168  2000/02/14 16:15:40  madden
1029  * Revert to 6.166
1030  *
1031  * Revision 6.167  2000/02/11 20:41:46  dondosha
1032  * Search on two query strands concatenated in blastn
1033  *
1034  * Revision 6.166  2000/02/09 19:40:00  madden
1035  * Fix purify problems in link_hsps
1036  *
1037  * Revision 6.165  2000/02/09 19:35:36  madden
1038  * Changed GetGisFromFile to also read binary gilists
1039  *
1040  * Revision 6.164  2000/02/03 21:34:07  dondosha
1041  * Fixed bug in setting extra_bytes_needed
1042  *
1043  * Revision 6.163  2000/02/02 20:01:57  madden
1044  * Added LIBCALLBACK to a callback
1045  *
1046  * Revision 6.162  2000/02/02 18:21:31  madden
1047  * Joerg optimizations for link_hsps
1048  *
1049  * Revision 6.161  2000/02/02 16:56:23  dondosha
1050  * Do not call BlastSaveCurrentHitlist if handle_results callback set
1051  *
1052  * Revision 6.160  2000/02/02 15:05:42  dondosha
1053  * Removed call to ReapHitlistByContext, erroneously included in previous version
1054  *
1055  * Revision 6.159  2000/02/01 22:37:05  dondosha
1056  * Call the new routine BlastReapHitlistByContext only when greedy alignment option is set
1057  *
1058  * Revision 6.158  2000/02/01 21:47:04  dondosha
1059  * Added greedy basic gapped alignment option
1060  *
1061  * Revision 6.157  2000/01/14 15:17:13  madden
1062  * Set no_check_score in pbp
1063  *
1064  * Revision 6.156  2000/01/13 18:10:41  madden
1065  * Fix problem with incorrect stat values for blastn and missing hits
1066  *
1067  * Revision 6.155  2000/01/13 14:27:04  madden
1068  * Fixed other problem in BlastWordFinder_contig()
1069  *
1070  * Revision 6.154  2000/01/12 18:52:23  shavirin
1071  * Fixed lookup_pos in BlastWordFinder_contig().
1072  *
1073  * Revision 6.153  2000/01/11 18:36:25  shavirin
1074  * Added functions, those handle dynamic lookup table.
1075  *
1076  * Revision 6.152  2000/01/11 15:32:46  dondosha
1077  * Fixed memory leaks in opening shared header and sequence file memory maps
1078  *
1079  * Revision 6.151  2000/01/04 22:52:25  madden
1080  * Restored code for using real db size
1081  *
1082  * Revision 6.150  1999/12/31 14:23:18  egorov
1083  * Add support for using mixture of real and maks database with gi-list files:
1084  * 1. Change logic of creating rdfp list.
1085  * 2. BlastGetDbChunk gets real databases first, then masks.
1086  * 3. Propoper calculation of database sizes using alias files.
1087  * 4. Change to CommonIndex to support using of mask databases.
1088  * 5. Use correct gis in formated output (BlastGetAllowedGis()).
1089  * 6. Other small changes
1090  *
1091  * Revision 6.149  1999/12/29 19:03:59  shavirin
1092  * Relative pointers in BlastWordFinder_mh_contig() updated to 8 byte pointers
1093  *
1094  * Revision 6.148  1999/12/29 18:57:03  shavirin
1095  * Added possibility to use relative pointers in BlastWordFinder_mh_contig().
1096  *
1097  * Revision 6.147  1999/12/22 21:55:38  dondosha
1098  * Close header and sequence files when search is done
1099  *
1100  * Revision 6.146  1999/12/21 20:05:48  egorov
1101  * Change logic of generating mask file when we have a gi-list file,
1102  * real database and mask database.  In fact, this is a big bug fix.
1103  *
1104  * Revision 6.145  1999/12/16 19:17:34  egorov
1105  * Code cleanup
1106  *
1107  * Revision 6.144  1999/12/02 14:39:35  egorov
1108  * When both mask and gi_list are specified, do not overwrite calculated
1109  * number of sequences and database length with values from alias file.
1110  *
1111  * Revision 6.143  1999/11/30 19:00:49  madden
1112  * Added Nlm_SwapUint4 calls for the ordinal ID list
1113  *
1114  * Revision 6.142  1999/11/26 22:26:13  madden
1115  * Added BlastNT functions for nucl. extensions
1116  *
1117  * Revision 6.141  1999/11/24 21:43:35  madden
1118  * Added Nlm_SwapUint4 call to make database masks work with both big and small endian systems
1119  *
1120  * Revision 6.140  1999/11/12 20:57:39  shavirin
1121  * Added parameter use_best_align into BLAST_ParameterBlkPtr
1122  *
1123  * Revision 6.139  1999/10/27 21:33:00  madden
1124  * Use housekeeping threads only for larger sequences
1125  *
1126  * Revision 6.138  1999/10/26 20:45:18  madden
1127  * Add use_real_db_size option
1128  *
1129  * Revision 6.137  1999/10/19 17:41:20  madden
1130  * Ensure that ThreadJoin is called on every thread created
1131  *
1132  * Revision 6.136  1999/10/14 17:57:44  madden
1133  * Fix for database size set by user, remove ununsed variables
1134  *
1135  * Revision 6.135  1999/10/12 19:34:08  madden
1136  * Call MutexDestroy on callback_mutex
1137  *
1138  * Revision 6.134  1999/10/08 17:39:57  egorov
1139  * Store input gi list to pick up correct definition for redundant sequences
1140  *
1141  * Revision 6.133  1999/10/05 18:16:06  shavirin
1142  * Functions tick_proc and get_db_chunk were renamed and become public.
1143  *
1144  * Revision 6.132  1999/10/05 17:42:53  shavirin
1145  * Removed global variables from blast.c
1146  *
1147  * Revision 6.131  1999/10/01 21:07:12  shavirin
1148  * Chanded definition and adjusted function get_db_list().
1149  *
1150  * Revision 6.130  1999/09/28 20:14:32  madden
1151  * Joerg changes to mimize cache misses
1152  *
1153  * Revision 6.129  1999/09/22 21:54:08  egorov
1154  * remove debug info
1155  *
1156  * Revision 6.128  1999/09/22 21:03:55  egorov
1157  * Add mask DB stuff
1158  *
1159  * Revision 6.127  1999/09/16 16:54:23  madden
1160  * Changes to BlastNtWordFinder for long words
1161  *
1162  * Revision 6.126  1999/09/16 14:16:54  madden
1163  * Changed call to lookup_find_init
1164  *
1165  * Revision 6.125  1999/08/27 18:07:32  shavirin
1166  * Passed parameter decline_align from top to the engine.
1167  *
1168  * Revision 6.124  1999/08/26 14:55:15  madden
1169  * Fixed Int8 problem
1170  *
1171  * Revision 6.123  1999/08/25 13:11:16  madden
1172  * Roll back to rev 6.121
1173  *
1174  * Revision 6.121  1999/08/20 19:47:24  madden
1175  * Changed call to BlastSearchBlkNew(Extra), removed use of version array
1176  *
1177  * Revision 6.120  1999/08/06 18:46:13  madden
1178  * Fixed spelling of incompatible
1179  *
1180  * Revision 6.119  1999/06/07 18:28:20  beloslyu
1181  * NetBSD port
1182  *
1183  * Revision 6.118  1999/05/27 17:33:04  madden
1184  * Fixed Int2 (should have been Int4) problem
1185  *
1186  * Revision 6.117  1999/04/28 13:30:03  madden
1187  * Use BlastConstructErrorMessage for error messages
1188  *
1189  * Revision 6.116  1999/04/23 16:45:53  madden
1190  * call BQ_IncSemaphore as callback
1191  *
1192  * Revision 6.115  1999/04/22 16:45:29  shavirin
1193  * Added load-ballancing function.
1194  *
1195  * Revision 6.114  1999/04/13 16:39:14  madden
1196  * Fixed problem if first context not plus strand
1197  *
1198  * Revision 6.113  1999/04/07 20:43:33  egorov
1199  * Fix a bug when ordinal_id == 0 was not allowed
1200  *
1201  * Revision 6.112  1999/04/01 21:42:45  madden
1202  * Fix memory leaks when gi list is used
1203  *
1204  * Revision 6.111  1999/03/23 21:38:19  madden
1205  * Add Join to BlastStopAwakeThread
1206  *
1207  * Revision 6.110  1999/03/19 17:03:29  egorov
1208  * Initialize global variable
1209  *
1210  * Revision 6.109  1999/03/16 15:52:25  vakatov
1211  * Got rid of extra comments-within-comments in the CVS Log section
1212  *
1213  * Revision 6.108  1999/03/16 02:49:31  beloslyu
1214  * typo fixed
1215  *
1216  * Revision 6.107  1999/03/15 22:06:01  madden
1217  * Changed cpu limit message
1218  *
1219  * Revision 6.106  1999/03/12 15:03:43  egorov
1220  * Add proper Int4-long type casting
1221  *
1222  * Revision 6.105  1999/03/04 14:18:08  egorov
1223  * Do correct filter masking when query is seqloc
1224  * The only BlastMaskTheResidues() function is changed:
1225  *
1226  * Revision 6.104  1999/02/26 22:23:06  madden
1227  * Fixed bug when only one HSP allowed per area
1228  *
1229  * Revision 6.103  1999/02/25 17:40:48  madden
1230  * Check that proper sequence type is used in setup function
1231  *
1232  * Revision 6.102  1999/02/17 13:23:00  madden
1233  * Added hsp_num_max
1234  *
1235  * Revision 6.101  1999/02/11 13:52:59  madden
1236  * fixed memory leak
1237  *
1238  * Revision 6.100  1999/01/28 17:19:50  madden
1239  * Call BlastSeqLocFilterEx on reverse strand if plus strand NULL
1240  *
1241  * Revision 6.99  1999/01/28 16:04:25  madden
1242  * HspArrayPurge change, HeapSort of HSPs, efficiency in blastn wordfinder
1243  *
1244  * Revision 6.98  1999/01/26 17:55:50  madden
1245  * start set to last_db_seq
1246  *
1247  * Revision 6.97  1999/01/19 13:32:33  madden
1248  * Fix for final db sequence to search
1249  *
1250  * Revision 6.96  1998/12/31 18:17:02  madden
1251  * Added strand option
1252  *
1253  * Revision 6.95  1998/12/31 15:36:05  victorov
1254  * filtering internals is now based on SeqLoc instead of Bioseq
1255  *
1256  * Revision 6.94  1998/12/29 17:44:43  madden
1257  * Add BlastGetNonSumStatsEvalue, optimizations for NtWordFinder
1258  *
1259  * Revision 6.93  1998/12/18 16:19:57  madden
1260  * Make BLASTSetUpSearchWithReadDbInternal public, add BlastSearchBlkNewExtra
1261  *
1262  * Revision 6.92  1998/12/17 22:29:47  victorov
1263  * the way gifile is found has changed: now we look first in the
1264  * current directory then $BLASTDB and then in ncbirc
1265  *
1266  * Revision 6.91  1998/12/15 14:11:27  madden
1267  * Change to permit an arbitrary number of HSPs
1268  *
1269  * Revision 6.90  1998/11/27 15:44:58  madden
1270  * Ensure that gap_x_dropoff_final is at least as large as gap_x_dropoff.
1271  *
1272  * Revision 6.89  1998/11/23 13:36:07  madden
1273  * Check for non-NULL tick_callback before acquiring mutex
1274  *
1275  * Revision 6.88  1998/11/19 14:03:24  madden
1276  * Added comments, minor efficiency
1277  *
1278  * Revision 6.87  1998/10/13 20:37:51  madden
1279  * Use IS_residue after call to SeqPortGetResidue
1280  *
1281  * Revision 6.86  1998/09/24 15:26:34  egorov
1282  * Fix lint complaints
1283  *
1284  * Revision 6.85  1998/09/22 16:28:03  madden
1285  * Added call to lookup_position_aux_destruct
1286  *
1287  * Revision 6.84  1998/09/14 15:11:12  egorov
1288  * Add support for Int8 length databases; remove unused variables
1289  *
1290  * Revision 6.83  1998/09/04 14:45:39  madden
1291  * Moved code from blast.c blastool.c
1292  *
1293  * Revision 6.82  1998/08/29 20:06:46  madden
1294  * Do not find words for pattern search
1295  *
1296  * Revision 6.81  1998/08/26 19:20:26  madden
1297  * Added SignalIgnore
1298  *
1299  * Revision 6.80  1998/08/13 20:00:20  egorov
1300  * Add check if gilist file exists on server
1301  *
1302  * Revision 6.79  1998/08/11 13:27:22  madden
1303  * Fix to small function for culling
1304  *
1305  * Revision 6.78  1998/08/05 13:08:16  madden
1306  * Removed obsolete global_rdfp
1307  *
1308  * Revision 6.77  1998/07/30 19:00:24  madden
1309  * Change to allow search of subset of database
1310  *
1311  * Revision 6.76  1998/07/28 21:17:45  madden
1312  * added do_not_reevaluate and mask_at_hash
1313  *
1314  * Revision 6.75  1998/07/25 14:26:39  madden
1315  * Added comments
1316  *
1317  * Revision 6.74  1998/07/22 20:31:25  madden
1318  * Added comments
1319  *
1320  * Revision 6.73  1998/07/22 12:16:23  madden
1321  * Added handle_results
1322  *
1323  * Revision 6.72  1998/07/21 20:58:01  madden
1324  * Changes to allow masking at hash only
1325  *
1326  * Revision 6.71  1998/07/17 15:39:53  madden
1327  * Changes for Effective search space.
1328  *
1329  * Revision 6.70  1998/07/14 20:14:37  egorov
1330  * Allow to specify gilist and gifile from client side
1331  *
1332  * Revision 6.69  1998/07/09 14:39:04  madden
1333  * Fix memory leak
1334  *
1335  * Revision 6.68  1998/07/02 21:00:36  egorov
1336  * Remove memory leak in threaded version
1337  *
1338  * Revision 6.67  1998/06/25 13:14:48  madden
1339  * check for NULL pointer in BlastPossibleDeleteWholeHeap
1340  *
1341  * Revision 6.66  1998/06/12 16:07:40  madden
1342  * Fixed typo
1343  *
1344  * Revision 6.65  1998/06/12 15:52:52  madden
1345  * Fixed warnings
1346  *
1347  * Revision 6.64  1998/06/02 21:21:18  madden
1348  * Changes for DNA matrices
1349  *
1350  * Revision 6.63  1998/06/02 13:10:14  madden
1351  * Fixed increment problem in for loop
1352  *
1353  * Revision 6.62  1998/05/28 19:58:48  madden
1354  * Zhengs new culling code
1355  *
1356  * Revision 6.61  1998/05/22 20:19:51  madden
1357  * Changes to fix multi-db search bug
1358  *
1359  * Revision 6.60  1998/05/17 16:28:39  madden
1360  * Allow changes to filter options and cc filtering.
1361  *
1362  * Revision 6.59  1998/05/05 14:05:32  madden
1363  * Added functions BlastStartAwakeThread and BlastStopAwakeThread
1364  *
1365  * Revision 6.58  1998/04/24 21:51:12  madden
1366  * Check return value on BlastScoreBlkFill
1367  *
1368  * Revision 6.57  1998/04/24 19:26:47  madden
1369  * Allocate ideal Karlin-Blk
1370  *
1371  * Revision 6.56  1998/04/15 20:23:47  madden
1372  * offset arg removed from BlastMaskTheResidues
1373  *
1374  * Revision 6.55  1998/04/01 22:46:55  madden
1375  * Set query_invalid flag when there is no valid sequence
1376  *
1377  * Revision 6.54  1998/03/27 01:39:08  madden
1378  * Check for non-zero subject length in link_hsps
1379  *
1380  * Revision 6.53  1998/03/25 22:26:46  madden
1381  * Use NlmThreadCreateEx
1382  *
1383  * Revision 6.52  1998/03/24 15:38:20  madden
1384  * Use BlastDoubleInt4Ptr to keep track of gis and ordinal_ids
1385  *
1386  * Revision 6.51  1998/03/19 22:16:18  madden
1387  * Changes to allow blasting by gi list
1388  *
1389  * Revision 6.50  1998/03/18 14:14:05  madden
1390  * Support random access by gi list
1391  *
1392  * Revision 6.49  1998/03/14 18:29:16  madden
1393  * Added BlastSeqIdListPtr
1394  *
1395  * Revision 6.48  1998/03/09 22:14:39  madden
1396  * Set seqid_list to NULL for child threads
1397  *
1398  * Revision 6.47  1998/02/27 14:34:26  madden
1399  * Added missing return value
1400  *
1401  * Revision 6.46  1998/02/26 22:35:00  madden
1402  * Added return value to link_hsp
1403  *
1404  * Revision 6.45  1998/02/26 19:08:07  madden
1405  *  Removed BlastNtFindWords BlastPopulateAllWordArrays BlastFindWords and BlastNewFindWords
1406  *
1407  * Revision 6.44  1998/02/26 16:56:02  madden
1408  * Fix for flyblast type searches
1409  *
1410  * Revision 6.43  1998/02/24 22:46:00  madden
1411  * Added option to shutdown culling
1412  *
1413  * Revision 6.42  1998/02/19 22:57:20  madden
1414  * Correctly set multiple_hits flag in BlastSetUpSearchEx
1415  *
1416  * Revision 6.41  1998/02/02 21:42:17  madden
1417  * link_hsps returns first BLAST_HSPPtr in list
1418  *
1419  * Revision 6.40  1998/01/31 21:33:49  madden
1420  * Fix to ensure hits are ranked properly
1421  *
1422  * Revision 6.39  1998/01/27 20:33:19  madden
1423  * Adjustments for query and db lengths
1424  *
1425  * Revision 6.38  1998/01/23 22:01:49  madden
1426  * Effective query length fixes for short sequences
1427  *
1428  * Revision 6.37  1998/01/15 19:30:31  madden
1429  * Protection against crashes for short sequences
1430  *
1431  * Revision 6.36  1998/01/09 22:30:06  madden
1432  * Fix for range-dependent BLAST with short sequences
1433  *
1434  * Revision 6.35  1998/01/07 23:04:25  madden
1435  * Added mutex for callbacks
1436  *
1437  * Revision 6.34  1998/01/06 18:25:24  madden
1438  * Save query_slp
1439  *
1440  * Revision 6.33  1998/01/05 22:37:34  madden
1441  * Check that options->multiple_hits_only is set before using multiple_hits
1442  *
1443  * Revision 6.32  1998/01/05 21:14:51  madden
1444  * Added protection against NULL LookupTablePtr and BLAST_WordFinderPtr
1445  *
1446  * Revision 6.31  1998/01/05 16:46:46  madden
1447  * One or both strands can be searched, as opposed to only both, changes to number of contexts
1448  *
1449  * Revision 6.30  1997/12/31 19:46:40  madden
1450  * Optimization of database scanning loop
1451  *
1452  * Revision 6.29  1997/12/31 17:50:42  madden
1453  * Added function BlastNtWordFinder_mh
1454  *
1455  * Revision 6.28  1997/12/29 16:15:01  madden
1456  * Optimizations for BlastNtWordFinder
1457  *
1458  * Revision 6.27  1997/12/24 19:42:57  madden
1459  * Fix for cell dependent blast
1460  *
1461  * Revision 6.26  1997/12/23 19:13:36  madden
1462  * Removed flags parameter from NlmThreadCreate
1463  *
1464  * Revision 6.25  1997/12/23 18:11:51  madden
1465  * Changes for range-dependent blast
1466  *
1467  * Revision 6.24  1997/12/17 19:25:36  madden
1468  * replace THR_BOUND with THREAD_BOUND
1469  *
1470  * Revision 6.23  1997/12/11 22:19:49  madden
1471  * Removed unused variables and function
1472  *
1473  * Revision 6.22  1997/12/10 22:40:28  madden
1474  * Floats used in call to blast_set_parameters, use of defines rather than strings
1475  *
1476  * Revision 6.21  1997/12/08 21:56:25  madden
1477  * Check for queries without valid sequences
1478  *
1479  * Revision 6.20  1997/12/04 21:49:05  madden
1480  * Check for NULL returned by BioseqLockById
1481  *
1482  * Revision 6.19  1997/11/07 21:38:40  madden
1483  * Check for virtual Bioseqs
1484  *
1485  * Revision 6.18  1997/10/30 15:40:55  madden
1486  * Casts and fixes for DEC alpha
1487  *
1488  * Revision 6.17  1997/10/24 19:09:14  madden
1489  * Removed BlastSetReadDB and BlastGetReadDB_ID, changed to ReadDBGetDb and ReadDBGetDbId
1490  *
1491  * Revision 6.16  1997/10/21 19:49:53  madden
1492  * Fix for no valid query sequence and hitlist_max of 1
1493  *
1494  * Revision 6.15  1997/10/06 17:57:49  madden
1495  * DB chunk size now done properly
1496  *
1497  * Revision 6.14  1997/09/29 17:19:30  madden
1498  * Checks for two threads using the same resource
1499  *
1500  * Revision 6.13  1997/09/25 13:44:56  madden
1501  * tblastn fix for mutliple db searches
1502  *
1503  * Revision 6.12  1997/09/24 22:36:29  madden
1504  * Fixes for MT multidb searches
1505  *
1506  * Revision 6.11  1997/09/22 18:24:25  madden
1507  * Added ifdef for OS_UNIX_LINUX
1508  *
1509  * Revision 6.10  1997/09/22 17:36:18  madden
1510  * MACROS for position-specific matrices from Andy Neuwald
1511  *
1512  * Revision 6.9  1997/09/16 18:47:44  madden
1513  * ifdef for OS_UNIX_SUN
1514  *
1515  * Revision 6.8  1997/09/16 16:31:22  madden
1516  * More changes for multiple db runs
1517  *
1518  * Revision 6.7  1997/09/15 22:07:19  madden
1519  * Replacing ifdef RLIMIT_CPU with ifdef OS_UNIX
1520  *
1521  * Revision 6.6  1997/09/12 19:56:53  madden
1522  * Fix for multi-threaded runs
1523  *
1524  * Revision 6.5  1997/09/11 18:49:20  madden
1525  * Changes to enable searches against multiple databases.
1526  *
1527  * Revision 6.4  1997/09/10 23:10:53  kans
1528  * added ifdef RLIMIT_CPU for signal and headers
1529  *
1530  * Revision 6.3  1997/09/10 21:27:52  madden
1531  * Changes to set CPU limits
1532  *
1533  * Revision 6.2  1997/09/03 19:06:02  madden
1534  * Bug fix for effective HSP longer than query
1535  *
1536  * Revision 6.1  1997/08/27 14:46:43  madden
1537  * Changes to enable multiple DB searches
1538  *
1539  * Revision 6.0  1997/08/25 18:52:19  madden
1540  * Revision changed to 6.0
1541  *
1542  * Revision 1.227  1997/08/19 18:19:16  madden
1543  * Cast arg of log to Nlm_FloatHi
1544  *
1545  * Revision 1.226  1997/08/12 20:50:28  madden
1546  * Fixed case where two HSPs start at same query offset
1547  *
1548  * Revision 1.225  1997/07/29 17:07:01  madden
1549  * Fix for possible collision of two star threads
1550  *
1551  * Revision 1.224  1997/07/25 15:39:27  madden
1552  * Set correct query ID for filtering
1553  *
1554  * Revision 1.223  1997/07/24 21:08:31  madden
1555  * Take frame into account in sorting of hits for linking
1556  *
1557  * Revision 1.222  1997/07/22 17:17:23  madden
1558  * Added index callback
1559  *
1560  * Revision 1.221  1997/07/17 20:27:51  madden
1561  * Set choice to indicat frame when masking seqLoc is saved
1562  *
1563  * Revision 1.220  1997/07/16 20:35:11  madden
1564  * Call to BlastConvertProteinSeqLoc
1565  *
1566  * Revision 1.219  1997/07/16 18:51:55  madden
1567  * call to BioseqSeg, added static function BlastMakeTempProteinBioseq
1568  *
1569  * Revision 1.218  1997/07/15 20:37:05  madden
1570  * Calls to SeqLocSeg and BioseqSeg
1571  *
1572  * Revision 1.217  1997/07/14 20:11:03  madden
1573  * Removed unused variables
1574  *
1575  * Revision 1.216  1997/07/14 15:30:46  madden
1576  * Changed call to BlastKarlinBlkGappedCalc
1577  *
1578  * Revision 1.215  1997/07/11 19:28:23  madden
1579  * Added function BLASTSetUpSearchByLocWithReadDb
1580  *
1581  * Revision 1.214  1997/07/01 17:50:52  madden
1582  * used gapped Karlin-Altschul parameters when needed in LinkHsp
1583  *
1584  * Revision 1.213  1997/06/27 22:18:31  madden
1585  * MT fix for more threads than db seqs.
1586  *
1587  * Revision 1.212  1997/06/24 13:51:20  madden
1588  * Fixed SeqLoc leak
1589  *
1590  * Revision 1.211  1997/05/27 20:19:17  madden
1591  * Use of SeqLocDust rather than BioseqDust
1592  *
1593  * Revision 1.210  1997/05/22 21:24:46  madden
1594  * Added support for final gapX dropoff value
1595  *
1596  * Revision 1.209  1997/05/20 17:49:55  madden
1597  * Added functions BLASTSetUpSearchByLoc and BLASTSetUpSearchInternalByLoc
1598  *
1599  * Revision 1.208  1997/05/07 20:59:13  madden
1600  * Call to SeqId2OrdinalId replaces call to readdb_gi2seq
1601  *
1602  * Revision 1.207  1997/05/07 13:45:08  madden
1603  * Set mutex lock for ambiguity reevaluation, added use_large_gaps flag
1604  *
1605  * Revision 1.206  1997/05/01  21:08:26  madden
1606  * use ordinal index to rank results when they are statist. equivalent
1607  *
1608  * Revision 1.205  1997/05/01  15:53:07  madden
1609  * Addition of extra KarlinBlk's for psi-blast
1610  *
1611  * Revision 1.204  1997/04/25  13:57:43  madden
1612  * Fixed floating point exception by checking for zero query length value.
1613  *
1614  * Revision 1.203  1997/04/23  21:56:07  madden
1615  * Changes in BlastGetGappedAlignmentTraceback for in-frame gapping tblastn.
1616  *
1617  * Revision 1.202  1997/04/22  14:00:14  madden
1618  * Removed unused variables.
1619  *
1620  * Revision 1.201  1997/04/22  13:04:19  madden
1621  * Changes for in-frame blastx gapping.
1622  *
1623  * Revision 1.200  1997/04/17  22:07:48  madden
1624  * Changes to allow in-frame gapped tblastn.
1625  *
1626  * Revision 1.199  1997/04/09  20:01:53  madden
1627  * Added global_seqid's to allow only certain sequences in a db to be searched.
1628  *
1629  * Revision 1.198  1997/04/07  18:17:09  madden
1630  * Changed length_adjustment calculation.
1631  *
1632  * Revision 1.197  1997/04/04  15:30:37  madden
1633  * Removed extra fprint statement.
1634  *
1635  * Revision 1.196  1997/04/03  19:48:13  madden
1636  * Changes to use effective database length instead of the length of each
1637  * sequence in statistical calculations.
1638  *
1639  * Revision 1.195  1997/03/27  22:30:51  madden
1640  * Used gapped Karlin-Altschul parameters to calculate trigger for gapping.
1641  *
1642  * Revision 1.194  1997/03/20  22:09:52  madden
1643  * Used SeqIdFindBest to find GI in query.
1644  *
1645  * Revision 1.193  1997/03/20  19:57:40  madden
1646  * Changes to support segmented Bioseq queries.
1647  *
1648  * Revision 1.192  1997/03/14  22:06:11  madden
1649  * fixed MT bug in BlastReevaluateWithAmbiguities.
1650  *
1651  * Revision 1.191  1997/03/08  16:52:16  madden
1652  * Check in Reevaluate function to see if sequence is worth checking,
1653  * Added discontinuous option to ParameterBlk.
1654  *
1655  * Revision 1.190  1997/03/07  21:58:36  madden
1656  * Added Boolean gapped argument to BLASTOptionNew.
1657  *
1658  * Revision 1.189  1997/03/07  21:11:22  madden
1659  * Added in check for blastn on gapped calculations.
1660  *
1661  * Revision 1.188  1997/03/05  14:29:46  madden
1662  * Moved BlastSaveCurrentHsp to blastutl.c.
1663  *
1664  * Revision 1.187  1997/03/04  21:34:59  madden
1665  * Added in HspArrayPurge.
1666  *
1667  * Revision 1.186  1997/03/04  20:08:19  madden
1668  * Moved gapped alignment code from blast.c to blastutl.c
1669  *
1670  * Revision 1.185  1997/03/03  22:39:45  madden
1671  * Moved code from blast.c to blastutl.c.
1672  *
1673  * Revision 1.184  1997/03/03  21:47:22  madden
1674  * Moved functions from blast.c to blastutl.c for 16-bit windows.
1675  *
1676  * Revision 1.183  1997/03/03  20:58:09  madden
1677  * Fixed call to BlastGetGappedAlignmentTraceback; purged hitlist
1678  * for very short database sequences.
1679  *
1680  * Revision 1.182  1997/03/01  18:25:33  madden
1681  * reverse flag added to BlastGetGappedAlignmentTraceback functions.
1682  *
1683  * Revision 1.181  1997/02/24  16:40:38  madden
1684  * Change to GapXEditBlockToSeqAlign to use first SeqIdPtr, duplicate.
1685  *
1686  * Revision 1.180  1997/02/24  15:09:38  madden
1687  * Fixed bug where NULL pointer was dereferenced.
1688  *
1689  * Revision 1.179  1997/02/24  13:10:27  madden
1690  * Added function BlastGappedScoreInternal.
1691  *
1692  * Revision 1.178  1997/02/23  16:44:47  madden
1693  * GapAlignBlk became GapAlignBlkPtr and GapAlignBlkNew called.
1694  *
1695  * Revision 1.177  1997/02/20  21:50:24  madden
1696  * Added frame and translation information to GapAlignBlk, assigned it.
1697  *
1698  * Revision 1.176  1997/02/20  18:38:34  madden
1699  * Allowed theoretical database length to be set.
1700  *
1701  * Revision 1.175  1997/02/19  22:29:32  madden
1702  * Changes to handle multiple contexts in BlastGetGappedScore.
1703  *
1704  * Revision 1.174  1997/02/19  14:17:03  madden
1705  * GappedScore routines now work on all contexts.
1706  *
1707  * Revision 1.173  1997/02/17  17:39:54  madden
1708  * Changes to RealBlastGetGappedAlignmentTraceback for gapped blastn.
1709  *
1710  * Revision 1.172  1997/02/13  21:04:15  madden
1711  * fixed UMR.
1712  *
1713  * Revision 1.171  1997/02/12  22:19:08  madden
1714  * Added functions BlastNewWordExtend, BlastNewWordExtend_prelim, and
1715  * BlastNewFindWords for use in position based blast.
1716  *
1717  * Revision 1.170  1997/02/11  19:29:34  madden
1718  * Addition of BlastGetGappedScoreWithReaddb, removed dependence of
1719  * BlastGetGappedScore on readdb.
1720  *
1721  * Revision 1.169  1997/02/10  20:27:01  madden
1722  * Changed some CharPtr's into Uint1Ptr's.
1723  *
1724  * Revision 1.168  1997/02/10  20:14:23  madden
1725  * replaced doubles by Nlm_FloatHi's.
1726  *
1727  * Revision 1.167  1997/02/10  20:02:58  madden
1728  * Changed BlastSearchBlkNew to allow a set of words to be passed in.
1729  *
1730  * Revision 1.166  1997/02/10  15:24:59  madden
1731  * Set posMatrix element in gap_align structure.
1732  *
1733  * Revision 1.165  1997/02/07  22:43:03  madden
1734  * Moved BLAST_WordFinderNew and Destruct from blast.c to blastutl.c, made
1735  * non-static.
1736  *
1737  * Revision 1.164  1997/02/07  22:32:40  madden
1738  * Moved BlastGetSubjectId to blastutl.c, changed calling convention of
1739  * BlastGetSubjectId.
1740  *
1741  * Revision 1.163  1997/02/06  15:36:14  madden
1742  * Resuse 1st threshold if necessary.
1743  *
1744  * Revision 1.162  1997/02/06  14:27:15  madden
1745  * Addition of BlastAllWord structure.
1746  *
1747  * Revision 1.161  1997/02/05  19:54:59  madden
1748  * Changes for blastn gapped alignments.
1749  *
1750  * Revision 1.160  1997/02/04  22:12:59  madden
1751  * Added function RealBlastGetGappedAlignmentTraceback.
1752  *
1753  * Revision 1.159  1997/02/04  20:11:42  madden
1754  * Moved functions to blastutl.c
1755  *
1756  * Revision 1.158  1997/02/04  16:22:32  madden
1757  * Changes to enable gapped alignments on the reverse strand.
1758  *
1759  * Revision 1.157  1997/02/03  19:24:01  madden
1760  * Added function CheckGappedAlignmentsForOverlap.
1761  *
1762  * Revision 1.156  1997/02/03  17:19:03  madden
1763  * Increased number of bits for second pass if context factor > 1.
1764  *
1765  * Revision 1.155  1997/02/03  13:02:12  madden
1766  * Corrected SeqAlign offsets for minus strands.
1767  *
1768  * Revision 1.154  1997/01/31  22:42:51  madden
1769  * changed default thresholds and added strands to construction of SeqAlign.s
1770  *
1771  * Revision 1.153  1997/01/31  22:13:02  madden
1772  * Adjusted bit score by logK.
1773  *
1774  * Revision 1.152  1997/01/31  14:45:27  madden
1775  * Added check for threshold value to ValidateOptions.
1776  *
1777  * Revision 1.151  1997/01/30  19:12:19  madden
1778  * Fixed memory leak.
1779  *
1780  * Revision 1.150  1997/01/28  22:38:56  madden
1781  * Added function BLASTOptionValidate.
1782  *
1783  * Revision 1.149  1997/01/28  21:50:05  madden
1784  * Adjustments to CopyResultHspToHSP.
1785  *
1786  * Revision 1.148  1997/01/24  16:51:44  madden
1787  * Fixed memory leak.
1788  *
1789  * Revision 1.147  1997/01/24  15:13:02  madden
1790  * Changes to accommodate gapped blastn.
1791  *
1792  * Revision 1.146  1997/01/22  17:45:08  madden
1793  * Added search to GetStartForGappedAlignment.
1794  *
1795  * Revision 1.145  1997/01/17  17:41:44  madden
1796  * Added flags for position based BLAST.
1797  *
1798  * Revision 1.144  1997/01/14  17:22:30  madden
1799  * Changes for MT, especially for small databases.
1800  *
1801  * Revision 1.143  1997/01/13  22:13:41  madden
1802  * set further_process to FALSE as needed.
1803  *
1804  * Revision 1.142  1997/01/13  20:06:36  madden
1805  * Added index_addition to strings before checking for ambiguties.
1806  *
1807  * Revision 1.141  1997/01/13  15:37:05  madden
1808  * Changed prototypes for star_callback and tick_callback.
1809  *
1810  * Revision 1.140  1997/01/11  18:58:29  madden
1811  * Removed defunct PerformBlastSearch... functions.
1812  *
1813  * Revision 1.139  1997/01/11  18:39:48  madden
1814  * Simplified ranged blast model.
1815  *
1816  * Revision 1.138  1997/01/11  18:22:10  madden
1817  * Changes to allow S2 to be set.
1818  *
1819  * Revision 1.137  1997/01/11  16:41:42  madden
1820  * Fix to tick_proc for MT runs.
1821  *
1822  * Revision 1.136  1997/01/09  17:44:35  madden
1823  * Added "bit_score" to BLASTResultHsp.
1824  *
1825  * Revision 1.135  1997/01/09  13:33:43  madden
1826  * Fixed NlmThreadCompare typo.
1827  *
1828  * Revision 1.134  1997/01/08  23:05:37  madden
1829  * Added call to TNlmThreadCompare.
1830  *
1831  * Revision 1.133  1997/01/07  20:40:29  madden
1832  * Added reverse Boolean to GetSeqAlignForResultHitList.
1833  *
1834  * Revision 1.132  1997/01/06  22:40:55  madden
1835  * Added function BlastGetSubjectId.
1836  *
1837  * Revision 1.131  1997/01/06  19:31:49  madden
1838  * Removed subject and query ID from GapAlignBlk.
1839  *
1840  * Revision 1.130  1997/01/06  17:22:59  madden
1841  * Used GapXEditScriptToSeqAlign to find SeqAlign.
1842  *
1843  * Revision 1.129  1997/01/04  20:41:11  madden
1844  * Shorter sequence is always the query in BlastTwoSequences.
1845  *
1846  * Revision 1.128  1997/01/03  20:29:32  madden
1847  * Corrected count of significant sequences.
1848  *
1849  * Revision 1.127  1997/01/03  19:03:35  madden
1850  * Fixed incorrect KarlinBlkPtr use.
1851  *
1852  * Revision 1.126  1997/01/03  17:26:50  madden
1853  * Fixed stats recordation.
1854  *
1855  * Revision 1.125  1996/12/30  21:45:28  madden
1856  * Added "strict" Boolean to CheckForRequiredRegion.
1857  *
1858  * Revision 1.124  1996/12/30  17:14:06  madden
1859  * Fixes for changes for "require a portion of the query sequence".
1860  *
1861  * Revision 1.123  1996/12/30  15:44:25  madden
1862  * Added capability to require a portion of the query sequence.
1863  *
1864  * Revision 1.122  1996/12/27  20:44:10  madden
1865  * Chnages to require that part of the query be included.
1866  *
1867  * Revision 1.121  1996/12/23  22:02:05  madden
1868  * Changes to allow two sequences to be compared.
1869  *
1870  * Revision 1.120  1996/12/23  15:57:21  madden
1871  * Removed extra call to BlastPreliminaryGappedScore.
1872  * y
1873  *
1874  * Revision 1.119  1996/12/23  14:04:44  madden
1875  * Added gap_trigger.
1876  *
1877  * Revision 1.118  1996/12/20  21:11:40  madden
1878  * Changes to allow multiple hits runs only.
1879  *
1880  * Revision 1.117  1996/12/20  15:31:05  madden
1881  * Removed defunct function.
1882  *
1883  * Revision 1.116  1996/12/20  14:22:48  madden
1884  * Added discontinuous Boolean to GetSeqAlignForResultHitList.
1885  *
1886  * Revision 1.115  1996/12/18  14:33:13  madden
1887  * Checked for high score when E-values are equivalent.
1888  *
1889  * Revision 1.114  1996/12/17  18:28:10  madden
1890  * Changed score used to gap HSP's.
1891  *
1892  * Revision 1.113  1996/12/17  17:28:27  madden
1893  * Removed sleep function for non-UNIX platforms.
1894  *
1895  * Revision 1.112  1996/12/17  17:27:03  madden
1896  * Count number of attempted gappings.
1897  *
1898  * Revision 1.111  1996/12/17  13:47:57  madden
1899  * Added star_proc.
1900  *
1901  * Revision 1.110  1996/12/16  19:24:38  madden
1902  * Correct to initial wordsize for blastn.
1903  *
1904  * Revision 1.109  1996/12/16  18:24:21  madden
1905  * Corrected shift in BlastNtFindWords.
1906  *
1907  * Revision 1.108  1996/12/16  15:29:12  madden
1908  * Changed gapalign.h to gapxdrop.h
1909  *
1910  * Revision 1.107  1996/12/16  14:35:48  madden
1911  * Replaced BLAST_GAPPED_OPTION ifdef with gapped_calculation Boolean.
1912  *
1913  * Revision 1.106  1996/12/13  22:00:23  madden
1914  * Corrected starting point for gapped extension with traceback.
1915  *
1916  * Revision 1.105  1996/12/13  18:13:56  madden
1917  * Added tick callback functions
1918  *
1919  * Revision 1.104  1996/12/13  15:09:31  madden
1920  * Changes to parameters used for gapped extensions.
1921  *
1922  * Revision 1.103  1996/12/12  16:44:35  madden
1923  * Removed unused variables.
1924  *
1925  * Revision 1.102  1996/12/12  16:34:58  madden
1926  * GapAlignBlk replaces arguments in PerformGappedAlignment etc.
1927  *
1928  * Revision 1.101  1996/12/12  14:04:03  madden
1929  * Fixes for check on whether HSP is already contained by gapped alignment.
1930  *
1931  * Revision 1.100  1996/12/10  19:20:15  madden
1932  * Changed minimal HSP score for gapped alignments.
1933  *
1934  * Revision 1.99  1996/12/10  17:30:59  madden
1935  * Changed statistics for gapped blastp
1936  *
1937  * Revision 1.98  1996/12/09  23:24:05  madden
1938  * Added parameters to control which sequences get a gapped alignment.
1939  *
1940  * Revision 1.97  1996/12/09  20:45:47  madden
1941  * Adjustments to calculation of gapped HSP's.
1942  *
1943  * Revision 1.96  1996/12/08  15:19:59  madden
1944  * Added functions to enable gapped alignments.
1945  *
1946  * Revision 1.95  1996/11/27  22:46:08  madden
1947  * Removed includes that are no longer used.
1948  *
1949  * Revision 1.94  1996/11/27  22:25:09  madden
1950  * Corrected collection of statistics for MT runs.
1951  *
1952  * Revision 1.93  1996/11/27  21:52:30  madden
1953  * Added function FilterWithSeg.
1954  *
1955  * Revision 1.92  1996/11/26  19:53:46  madden
1956  * Checked for return value on BlastScoreBlkMatFill.
1957  *
1958  * Revision 1.91  1996/11/25  20:13:47  madden
1959  * Changed how NlmMutexInit is called.
1960  *
1961  * Revision 1.90  1996/11/25  19:51:41  madden
1962  * Fix for tblastx stats.
1963  *
1964  * Revision 1.89  1996/11/25  18:58:24  madden
1965  * Adjustments for translated database.
1966  *
1967  * Revision 1.88  1996/11/22  19:04:58  madden
1968  * Removed ifdef for OLD_BIT_ORDER; changed default values.
1969  *
1970  * Revision 1.87  1996/11/22  15:28:03  madden
1971  * Fixed problem of last query residue examined on a diagonal.
1972  *
1973  * Revision 1.86  1996/11/21  18:08:38  madden
1974  * Changed order of if-else statements in get_db_chunk for
1975  * possible improvement of parallelization.
1976  *
1977  * Revision 1.85  1996/11/20  23:15:50  madden
1978  * Changes to acquisition of Mutex in BlastSaveCurrentHitlist to
1979  * improve parallelization.
1980  *
1981  * Revision 1.84  1996/11/19  22:23:52  madden
1982  * Changed link_hsps to link HSP's faster.
1983  *
1984  * Revision 1.83  1996/11/18  19:32:09  madden
1985  * Removed unused variables found by CodeWarrior.
1986  *
1987  * Revision 1.82  1996/11/18  18:07:57  madden
1988  * Duplicated translation_buffer (for tblast[nx]).
1989  *
1990  * Revision 1.81  1996/11/18  17:28:13  madden
1991  * Duplicated translation information in BlastSearchBlkDuplicate and
1992  * also number of contexts.
1993  *
1994  * Revision 1.80  1996/11/18  15:45:40  madden
1995  * FilterDNA function to perform dusting added (by Sergei Shavirin).
1996  *
1997  * Revision 1.79  1996/11/15  17:54:54  madden
1998  * Added support for alternate genetic codes for blastx, tblast[nx].
1999  *
2000  * Revision 1.78  1996/11/14  16:37:58  madden
2001  * Put average lengths in defines.
2002  *
2003  * Revision 1.77  1996/11/14  16:21:55  madden
2004  * changed CharPtr to Uint1Ptr in GetTranslation.
2005  *
2006  * Revision 1.76  1996/11/13  22:35:18  madden
2007  * Added tblast[nx] capability to BlastReevaluateWithAmbiguities.
2008  *
2009  * Revision 1.75  1996/11/12  19:56:35  madden
2010  * Small gaps not considered for blastn.
2011  *
2012  * Revision 1.74  1996/11/12  16:21:17  madden
2013  * Added in context_factor.
2014  *
2015  * Revision 1.73  1996/11/12  13:46:15  madden
2016  * Removed defunct SetUpBlastSearch type functions.
2017  *
2018  * Revision 1.72  1996/11/11  17:44:21  madden
2019  * Fixed check for overlap in search.
2020  *
2021  * Revision 1.71  1996/11/09  21:02:59  madden
2022  * Fixes for blastn extensions.
2023  *
2024  * Revision 1.70  1996/11/08  21:45:03  madden
2025  * Fix for blastn extensions.
2026  *
2027  * Revision 1.69  1996/11/07  22:31:15  madden
2028  * Added function BlastReevaluateWithAmbiguities for nucl. db's.
2029  *
2030  * Revision 1.68  1996/11/07  17:31:26  madden
2031  * Fixed over-incrementing of index in link_hsps.
2032  *
2033  * Revision 1.67  1996/11/06  22:10:01  madden
2034  * Further optimization of BlastTranslateUnambiguousSequence.
2035  *
2036  * Revision 1.66  1996/11/05  23:19:08  madden
2037  * Rewrote BlastTranslateUnambiguousSequence so it's faster.
2038  *
2039  * Revision 1.65  1996/11/04  19:27:13  madden
2040  * Deallocated search->translation_buffer if allocated.
2041  *
2042  * Revision 1.64  1996/11/04  16:59:43  madden
2043  * Added function GetPrivatTranslationTable to optimize translation
2044  * of database.
2045  *
2046  * Revision 1.63  1996/11/01  21:06:49  madden
2047  * Corrected the (nucl.) database for the translated length for tblast[nx].
2048  *
2049  * Revision 1.62  1996/10/31  16:27:20  shavirin
2050  * Multiple changes due to reverce of residues in BLAST database
2051  * for nucleotide sequences from (4321) to (1234)
2052  * New dumper now required to create BLAST databases.
2053  *
2054  * Revision 1.61  1996/10/28  22:15:24  madden
2055  * Added check in BlastNtWordFinder that subject sequence is longet
2056  * than min. word size.
2057  *
2058  * Revision 1.60  1996/10/04  20:12:26  madden
2059  * Fixed memory leaks found by purify.
2060  *
2061  * Revision 1.59  1996/10/03  20:49:29  madden
2062  * Calculate standard Karlin parameters for blastx and tblastx,
2063  * Use proper Karlin parameters in linking of HSP's.
2064  *
2065  * Revision 1.58  1996/10/02  19:59:44  madden
2066  * Fixed translation of query in blastx, calculated different karlin parameters
2067  * for each frame.
2068  *
2069  * Revision 1.57  1996/10/01  21:24:02  madden
2070  * e2 value now depends on program, correct cutoffs for blastn.
2071  *
2072  * Revision 1.56  1996/10/01  18:49:06  madden
2073  * Properly placed counters for number of hits, extensions.
2074  *
2075  * Revision 1.55  1996/09/30  21:56:12  madden
2076  * Replaced query alphabet of ncbi2na with blastna alphabet.
2077  *
2078  * Revision 1.54  1996/09/26  21:48:29  madden
2079  * Set small/large gaps in SeqALign.
2080  *
2081  * Revision 1.53  1996/09/26  20:18:43  madden
2082  * Addition of ExperimentalLocalBlastSearch function, fixes to SeqIdPtr's.
2083  *
2084  * Revision 1.52  1996/09/25  19:05:24  madden
2085  * Fixes to nucl. extension functions.
2086  *
2087  * Revision 1.51  1996/09/25  14:31:06  madden
2088  * Removed functions and statements for discontiguous word hits.
2089  *
2090  * Revision 1.50  1996/09/24  22:13:06  madden
2091  * BlastNtWordExtend now extends properly to end of query or subject.
2092  *
2093  * Revision 1.49  1996/09/24  18:39:51  madden
2094  * Changes to extend into the remainder of nucl. sequences (for blastn) and
2095  * to perform minus strand extensions.
2096  *
2097  * Revision 1.48  1996/09/20  21:58:14  madden
2098  * Changed CharPtr's to Uint1Ptr, got remainder length out of top order bits.
2099  *
2100  * Revision 1.47  1996/09/19  13:46:29  madden
2101  * Removed unused variables.
2102  *
2103  * Revision 1.46  1996/09/19  13:16:20  madden
2104  * Adjusted subject offset by READDB_COMPRESSION_RATIO for calc. of diagonal.
2105  *
2106  * Revision 1.45  1996/09/18  21:25:30  madden
2107  * Fixed bug in WordFinder for nucleotides.
2108  *
2109  * Revision 1.44  1996/09/18  13:39:24  madden
2110  * fixed offsets for SeqAligns on minus strands.
2111  *
2112  * Revision 1.43  1996/09/17  12:27:04  madden
2113  * Changes to perform correct extensions in blastn.
2114  *
2115  * Revision 1.42  1996/09/16  19:41:14  sad
2116  * Changed BlastTimeFillStructure() to use new functions from ncbitime.
2117  * That removes platform-dependent code from this function.
2118  *
2119  * Revision 1.41  1996/09/13 20:01:52  madden
2120  * put in READDB_UNPACK macros.
2121  *
2122  * Revision 1.40  1996/09/12  21:11:55  madden
2123  * Added extension funcitons for blastn
2124  *
2125  * Revision 1.39  1996/09/11  22:21:06  madden
2126  * Changes for blastn.
2127  *
2128  * Revision 1.38  1996/09/11  20:36:41  shavirin
2129  * Removed few Windows NT compiler warnings
2130  *
2131  * Revision 1.35  1996/09/11  19:14:09  madden
2132  * Added BLAST_OptionsBlkPtr structure and use thereof.
2133  *
2134  * Revision 1.34  1996/09/10  19:40:35  madden
2135  * Added functions to perform blastn comparison.
2136  *
2137  * Revision 1.33  1996/09/05  19:39:52  madden
2138  * Added "word_width" to position already covered on diagonal.
2139  *
2140  * Revision 1.32  1996/09/05  19:26:16  madden
2141  * Combined masking and shifting, removed some checks if prelim.
2142  *
2143  * Revision 1.31  1996/09/05  14:12:19  madden
2144  * New (faster) type of extension.
2145  *
2146  * Revision 1.30  1996/09/03  16:27:21  madden
2147  * Added efficiency in scanning of database.
2148  *
2149  * Revision 1.29  1996/08/30  19:27:37  madden
2150  * Fix for one-pass blast, memory-mapped file was being freed.
2151  *
2152  * Revision 1.28  1996/08/30  18:23:50  madden
2153  * A few efficiencies and a correction for one-pass blast.
2154  *
2155  * Revision 1.27  1996/08/30  15:17:57  madden
2156  * Minor efficiency in BlastReapHitlistByEvalue.
2157  *
2158  * Revision 1.25  1996/08/28  20:07:36  madden
2159  * Fix for UMR when the (nucl) sequence is exactly div. by four.
2160  *
2161  * Revision 1.24  1996/08/28  17:11:07  madden
2162  * Fixes for the translation of (nucl.) database sequences.
2163  *
2164  * Revision 1.23  1996/08/27  21:51:44  madden
2165  * Changes for tblastx
2166  *
2167  * Revision 1.22  1996/08/27  17:47:37  madden
2168  * current_hitlist purged on second pass for tblastn.
2169  *
2170  * Revision 1.21  1996/08/26  17:20:20  shavirin
2171  * Added support for WIN32 in function BlastTimeFillStructure()
2172  *
2173  * Revision 1.20  1996/08/23  18:50:23  madden
2174  * Adjusted some of the NT warning fixes to give correct results.
2175  *
2176  * Revision 1.19  1996/08/23  16:52:07  madden
2177  * Changed Int1 to Int4 in SetUpBlastSearchInternal.
2178  *
2179  * Revision 1.18  1996/08/23  16:39:02  madden
2180  * Fixed problem with SaveCurrentHsp.
2181  *
2182  * Revision 1.17  1996/08/23  15:29:44  shavirin
2183  * Fixed a lot of NT compiler warnings about type mismatch
2184  *
2185  * Revision 1.16  1996/08/21  21:37:01  madden
2186  * Added casts to silence compiler warning.s
2187  *
2188  * Revision 1.15  1996/08/21  21:24:56  madden
2189  * Changes for tblastn.
2190  *
2191  * Revision 1.14  1996/08/21  12:55:54  madden
2192  * Changed "purge" frame.
2193  *
2194  * Revision 1.13  1996/08/15  17:07:57  madden
2195  * Added efficiencies in loop that scans database.
2196  *
2197  * Revision 1.12  1996/08/14  20:01:30  madden
2198  * Efficiencies suggested by Zheng Zhang.
2199  *
2200  * Revision 1.11  1996/08/14  18:15:31  madden
2201  * Query frame moved from context to BlastSeqBlk.
2202  *
2203  * Revision 1.10  1996/08/14  17:19:29  madden
2204  * Correctly set frame for subject.
2205  *
2206  * Revision 1.9  1996/08/14  15:20:37  madden
2207  * Added Blast prefix to TranslateUnambiguousSequence function name.
2208  *
2209  * Revision 1.8  1996/08/14  14:30:42  madden
2210  * Cleaned up problem with UMR in TranslateUnambiguousSequence.
2211  *
2212  * Revision 1.7  1996/08/13  22:04:36  madden
2213  * Fixed TranslateUnambiguousSequence to properly read a nucl. db.
2214  *
2215  * Revision 1.6  1996/08/13  15:26:29  madden
2216  * Changes for tblastn.
2217  *
2218  * Revision 1.5  1996/08/09  22:11:12  madden
2219  * Added original_sequence to BlastSequenceAddSequence.
2220  *
2221  * Revision 1.4  1996/08/08  21:39:00  madden
2222  * Added some functions for tblastn.
2223  *
2224  * Revision 1.3  1996/08/07  14:23:45  madden
2225  * Added functions to produce SeqAlign from BLAST results.
2226  *
2227  * Revision 1.2  1996/08/06  16:07:31  madden
2228  * Removed unused functions Bsp2BLAST0Request.
2229  *
2230  * Revision 1.1  1996/08/05  19:45:46  madden
2231  * Initial revision
2232  *
2233  * Revision 1.118  1996/08/05  13:56:44  madden
2234  * Check if threads are available with NlmThreadsAvailable.
2235  *
2236  * Revision 1.117  1996/08/02  14:20:06  madden
2237  * Changes in call to readdb.
2238  *
2239  * Revision 1.116  1996/07/31  13:46:23  madden
2240  * Each thread gets own copy of ewp_params in SearchBlk.
2241  *
2242  * Revision 1.115  1996/07/31  13:09:17  madden
2243  * Changes for threaded blast.
2244  *
2245  * Revision 1.114  1996/07/25  20:45:20  madden
2246  * Change to calling convention of PerformBlastSearchWithReadDb.
2247  *
2248  * Revision 1.113  1996/07/25  12:55:20  madden
2249  * readdb_get_sequence call changed to allow for systems w/o mmap.
2250  *
2251  * Revision 1.112  1996/07/24  13:16:28  madden
2252  * Removed commented out fprintf.
2253  *
2254  * Revision 1.111  1996/07/24  12:00:07  madden
2255  * Changes for blastx.
2256  *
2257  * Revision 1.110  1996/07/18  22:00:02  madden
2258  * Changes for multiple contexts.
2259  *
2260  * Revision 1.109  1996/07/18  13:35:51  madden
2261  * Addition of the BLASTContextStructPtr.
2262  *
2263  * Revision 1.108  1996/07/16  15:01:02  madden
2264  * Cleaned up link_hsp function.
2265  *
2266  * Revision 1.107  1996/07/16  14:37:42  madden
2267  * Changes to link_hsp's so another array is not needed for the HSP's.
2268  *
2269  * Revision 1.106  1996/07/11  16:03:58  madden
2270  * SaveCurrentHitlist keeps track of which set an HSP belongs to.
2271  *
2272  * Revision 1.105  1996/07/05  17:16:34  madden
2273  * Optimized loop in contiguous word finder.
2274  *
2275  * Revision 1.104  1996/07/03  14:26:05  madden
2276  * Added test extension function.
2277  *
2278  * Revision 1.103  1996/07/02  14:32:53  madden
2279  * Added hspcnt_max.
2280  *
2281  * Revision 1.102  1996/07/02  12:04:15  madden
2282  * HSP's saved on array, rather than linked list.
2283  *
2284  * Revision 1.101  1996/07/01  15:30:06  madden
2285  * Don't NULL out hit if extension to left does not succeed.
2286  *
2287  * Revision 1.100  1996/06/27  18:41:39  madden
2288  * Changes to cutoff score to start second pass.
2289  *
2290  * Revision 1.99  1996/06/26  19:38:12  madden
2291  * Don't continue extension on 1st pass if the first (left) extension
2292  * doesn't reach to the first hit.
2293  *
2294  * Revision 1.98  1996/06/26  15:53:54  madden
2295  * Second dropoff score parameter added.
2296  *
2297  * Revision 1.97  1996/06/26  14:30:25  madden
2298  * Removed unused variables.
2299  *
2300  * Revision 1.96  1996/06/26  14:09:16  madden
2301  * Added comments and indents to loops.
2302  *
2303  * Revision 1.95  1996/06/26  13:29:50  madden
2304  * Changes to reduce the amount of memory and time of BlastFindWords.
2305  *
2306  * Revision 1.94  1996/06/24  20:26:46  madden
2307  * Dropoff ("X") set to first or second dropoff parameter.
2308  *
2309  * Revision 1.93  1996/06/24  17:57:09  madden
2310  * Added wordFinders to test dropoff scores.
2311  *
2312  * Revision 1.92  1996/06/20  16:51:17  madden
2313  * Removed unused parameters.
2314  *
2315  * Revision 1.91  1996/06/20  16:15:57  madden
2316  * Replaced int's with Int4's.
2317  *
2318  * Revision 1.90  1996/06/19  14:18:33  madden
2319  * Addition of SetUpBlastSearchInternal function.
2320  *
2321  * Revision 1.89  1996/06/17  19:02:13  madden
2322  * Removed unused MP code.
2323  *
2324  * Revision 1.88  1996/06/17  18:23:31  madden
2325  * Removed unused functions.
2326  *
2327  * Revision 1.87  1996/06/14  17:58:13  madden
2328  * Changes to avoid nulling out arrays for every sequence.
2329  *
2330  * Revision 1.86  1996/06/13  21:16:33  madden
2331  * database length removed from BLAST_ExtendWordNew.
2332  *
2333  * Revision 1.85  1996/06/13  21:04:17  madden
2334  * Added efficiencies to word finders.
2335  *
2336  * Revision 1.84  1996/06/11  18:13:54  madden
2337  * Removed unused variables.
2338  *
2339  * Revision 1.83  1996/06/11  17:58:31  madden
2340  * Changes to allow shorter arrays for multiple hits type blast.
2341  *
2342  * Revision 1.82  1996/06/10  16:52:16  madden
2343  * Use bit-shifting and masking instead of dividing and remainder.
2344  *
2345  * Revision 1.81  1996/06/10  13:44:07  madden
2346  * Changes to reduce the size of the "already visited" array.
2347  *
2348  * Revision 1.80  1996/06/06  17:54:09  madden
2349  * number_of_bits added to SetUpBlastSearch and SetUpBlastSearchWithReadDb.
2350  *
2351  * Revision 1.79  1996/06/06  14:09:22  madden
2352  * Removed defunct function BlastNWSThreshold, blast_set_parameters became
2353  * static.
2354  *
2355  * Revision 1.78  1996/06/06  13:54:51  madden
2356  * Removed defunct function BLAST_ParameterBlkFill
2357  *
2358  * Revision 1.77  1996/06/06  13:23:17  madden
2359  * CalculateSecondCutoffs only called for second pass.
2360  *
2361  * Revision 1.76  1996/06/04  15:32:53  madden
2362  * Changed counting of first and second pass hits.
2363  *
2364  * Revision 1.75  1996/06/04  13:50:28  madden
2365  * Purge HitList, rather than deleting it.
2366  *
2367  * Revision 1.74  1996/05/29  17:21:07  madden
2368  * Removed defunct BlastFixEandPValues function, replaced one call
2369  * to BlastSequenceAddSequence.
2370  *
2371  * Revision 1.73  1996/05/29  12:43:25  madden
2372  * Function BlastTimeFillStructure added to keep track of time.
2373  *
2374  * Revision 1.72  1996/05/28  14:12:53  madden
2375  * Added code to collect statistics.
2376  *
2377  * Revision 1.71  1996/05/23  21:55:04  madden
2378  * Removed unused variable initlen
2379  *
2380  * Revision 1.70  1996/05/22  20:19:22  madden
2381  * Removed unused variables, fixed codecenter nits.
2382  *
2383  * Revision 1.68  1996/05/20  21:17:49  madden
2384  * Changed (incorrect) NULL's to zero's.
2385  *
2386  * Revision 1.67  1996/05/16  19:50:15  madden
2387  * Added documentation block.
2388  *
2389  * Revision 1.66  1996/05/16  13:28:24  madden
2390  * Both 1st and 2nd pass can separately be contiguous or discontiguous.
2391  *
2392  * Revision 1.64  1996/05/14  19:51:37  madden
2393  * Added some register variables.
2394  *
2395  * Revision 1.63  1996/05/14  18:56:53  madden
2396  * Unrolled some loops in extension function.
2397  *
2398  * Revision 1.62  1996/05/14  16:15:59  madden
2399  * Fixes to SaveCurrentHitlist
2400  *
2401  * Revision 1.61  1996/05/10  18:19:20  madden
2402  * Made lookup_pos a register variable.
2403  *
2404  * Revision 1.59  1996/05/09  13:14:56  madden
2405  * Consolidated CalculateEffectiveLengths and BlastReapHSPsByEvalue into other
2406  * functions.
2407  *
2408  * Revision 1.58  1996/05/03  19:54:24  madden
2409  * Removed defunct seqalign functions, optimized BlastWordFinder functions.
2410  *
2411  * Revision 1.57  1996/05/01  14:57:37  madden
2412  * Added BlastResults structures.
2413  *
2414  * Revision 1.56  1996/04/24  19:46:34  madden
2415  * Removed q_rightmost and q_leftmost from the extend function.
2416  *
2417  * Revision 1.55  1996/04/24  18:01:11  madden
2418  * Used call to readdb_get_max_length for first call to BLAST_ExtendWordNew.
2419  *
2420  * Revision 1.54  1996/04/24  16:16:58  madden
2421  * Changed LinkHsp's not to reallocate the hsp array every time.
2422  *
2423  * Revision 1.53  1996/04/24  12:51:15  madden
2424  * deleted function BlastSequenceAddSequenceIdToSequenceBlk.
2425  *
2426  * Revision 1.52  1996/04/22  21:39:31  madden
2427  * New calls to readdb_get_sequence.
2428  *
2429  * Revision 1.51  1996/04/18  13:39:33  madden
2430  * demodularized lookup of initial hits.
2431  *
2432  * Revision 1.50  1996/04/16  15:32:47  madden
2433  * economies added to new extension functions, non-scoring identical
2434  * words not added to lookup tables.
2435  *
2436  * Revision 1.48  1996/04/11  14:29:33  madden
2437  * function BlastWordExtend completely rewritten.
2438  *
2439  * Revision 1.47  1996/04/04  20:46:22  madden
2440  * Optimized extension function; made "lookup_find" a FnPtr.
2441  *
2442  * Revision 1.46  1996/04/03  19:13:04  madden
2443  * added functions PerformBlastSearchWithReadDb and Perform2PassBlastSearchWithReadDb.
2444  *
2445  * Revision 1.45  1996/03/29  21:26:01  madden
2446  * "hitlist" now kept on SeqAlign rather than HitList.
2447  *
2448  * Revision 1.44  1996/03/29  14:08:18  madden
2449  * SetUpBlastSearchWithReadDb added.
2450  *
2451  * Revision 1.43  1996/03/28  18:45:45  madden
2452  * sequence now added to hitlist after significance has been established.
2453  *
2454  * Revision 1.42  1996/03/27  23:51:11  madden
2455  * added function AddDescriptorsToHitlistWithReadDb.
2456  *
2457  * Revision 1.41  1996/03/27  23:19:24  madden
2458  * Added PerformBlastSearchWithReadDb and Perform2PassBlastSearchWithReadDb,
2459  * changed parameters for PerformBlastSearch and Perform2PassBlastSearch.
2460  *
2461  * Revision 1.40  1996/03/27  19:51:09  madden
2462  * current hits now saved on "current_hitlist", not saved to main
2463  * hitlist until significance decided upon.
2464  *
2465  * Revision 1.39  1996/03/26  19:36:15  madden
2466  * Changes to read databases formatted with formatdb.
2467  *
2468  * Revision 1.38  1996/03/25  16:34:19  madden
2469  * Changes to mimic old statistics.
2470  *
2471  * Revision 1.37  1996/03/20  14:28:57  madden
2472  * Changed cutoff values.
2473  *
2474  * Revision 1.36  1996/03/11  13:52:52  madden
2475  * Ignore gaps when the sequences are too short.
2476  *
2477  * Revision 1.35  1996/02/28  21:36:54  madden
2478  * changes for discontiguous words.
2479  *
2480  * Revision 1.34  1996/02/15  23:31:19  madden
2481  * Trimmed ends of HSP's in comparison with gap.
2482  *
2483  * Revision 1.33  1996/02/15  23:19:43  madden
2484  * Changed call to BlastScoreBlkFill
2485  *
2486  * Revision 1.32  1996/02/15  15:22:52  madden
2487  * Trimming of sequence ends for linking.
2488  *
2489  * Revision 1.31  1996/02/13  14:05:57  madden
2490  * changes to ensure that closer to optimal HSP's are found.
2491  *
2492  * Revision 1.30  1996/02/09  13:50:09  madden
2493  * Added BlastReapHSPsByEvalue; changes to allow both one and two pass runs.
2494  *
2495  * Revision 1.29  1996/02/06  22:50:56  madden
2496  * Changes for two-pass runs.
2497  *
2498  * Revision 1.28  1996/02/05  18:46:09  madden
2499  * Added support for two threshold values.
2500  *
2501  * Revision 1.27  1996/02/02  19:24:53  madden
2502  * Added wfp_first and wfp_second for first and second pass.
2503  *
2504  * Revision 1.26  1996/01/31  17:33:54  madden
2505  * Added function BlastReapHitlistByEvalue.
2506  *
2507  * Revision 1.25  1996/01/29  21:11:38  madden
2508  * Changes for MultipleHits BLAST.
2509  *
2510  * Revision 1.24  1996/01/23  16:30:52  madden
2511  * e_cutoff changed from BLAST_Score to double in SetUpBlastSearch.
2512  *
2513  * Revision 1.23  1996/01/22  22:31:01  madden
2514  * Fixed BlastFindWords to increment index1 correctly.
2515  *
2516  * Revision 1.22  1996/01/22  22:05:05  madden
2517  * Set initial e2 to 0.5.
2518  *
2519  * Revision 1.20  1996/01/17  16:59:56  madden
2520  * Added gap arguments to SetUpBlastSearch.
2521  *
2522  * Revision 1.19  1996/01/17  13:45:03  madden
2523  * Added function BlastFixEandPValues.
2524  *
2525  * Revision 1.18  1996/01/16  15:28:05  madden
2526  * Set i_am_multitasking flag.
2527  *
2528  * Revision 1.16  1996/01/10  17:50:21  madden
2529  * sort hitlist by pvalue.
2530  *
2531  * Revision 1.15  1996/01/08  23:23:22  madden
2532  * Fixed neighborhood bug, added some MP stuff
2533  *
2534  * Revision 1.14  1996/01/06  18:56:52  madden
2535  * Removed obsolete code, fixed purify nit.
2536  *
2537  * Revision 1.13  1996/01/06  17:50:20  madden
2538  * Fixed HeapSort functions for linking of HSP's.
2539  *
2540  * Revision 1.12  1996/01/06  17:18:42  madden
2541  * Fixed setting of "next" pointers when the HSp is part of a linked set.
2542  *
2543  * Revision 1.11  1996/01/06  16:29:38  madden
2544  * NULL'ed out some "link" pointers.
2545  *
2546  * Revision 1.10  1996/01/05  22:54:18  madden
2547  * Fixed HeapSort calls in linking routines.
2548  *
2549  * Revision 1.9  1996/01/05  15:51:14  madden
2550  * Added Stephen Altschul's link_hsps.
2551  *
2552  * Revision 1.8  1995/12/30  19:21:01  madden
2553  * Added PerformBlastSearch.
2554  *
2555  * Revision 1.7  1995/12/30  18:38:51  madden
2556  * Added function SetUpBlastSearch.
2557  *
2558  * Revision 1.6  1995/12/28  21:22:19  madden
2559  * Deallocated leaking memory.
2560  *
2561  * Revision 1.5  1995/12/26  23:03:22  madden
2562  * Added in functions to automatically set some parameters.
2563  *
2564  * Revision 1.4  1995/12/26  20:27:11  madden
2565  * simplified hit extension routine.
2566  *
2567  * Revision 1.3  1995/12/21  23:09:57  madden
2568  * BLAST_Score functions moved to blastkar.c
2569  *
2570  * */
2571 
2572 #include <ncbi.h>
2573 #include <blastpri.h>
2574 #include <lookup.h>
2575 #include <objcode.h>
2576 #include <objseq.h>
2577 #include <sequtil.h>
2578 #include <tofasta.h>
2579 #include <seqport.h>
2580 #include <readdb.h>
2581 #include <ncbithr.h>
2582 #include <gapxdrop.h>
2583 #include <dust.h>
2584 
2585 #include <mbalign.h>
2586 #include <mblast.h>
2587 
2588 /*
2589 The last database sequence a tick (progress indicator) was issued for
2590 and the increments (i.e., number of db sequences completed) that a tick
2591 should be emitted.
2592 */
2593 /* Int4	last_db_seq=0, db_incr=0; */
2594 
2595 /*
2596 	Set to TRUE if the process has timed out.
2597 */
2598 volatile Boolean time_out_boolean;
2599 
2600 /*
2601 	SeqId lists if only a certain number of the database sequences will be
2602 	used for the search.
2603 */
2604 /* SeqIdPtr global_seqid_list=NULL, global_seqid_ptr; */
2605 
2606 /*
2607 	GI List to be used if database will be searched by GI.
2608 	current is the current element in the array being worked on.
2609 	global_gi_being_used specifies that it will be used.
2610 */
2611 
2612 /* Int4 global_gi_current=0;
2613    Boolean global_gi_being_used=FALSE; */
2614 
2615 /* Function to emit progress messages, set by user. */
2616 /* int (LIBCALLBACK *tick_callback)PROTO((Int4 done, Int4 positives)); */
2617 
2618 /* int (LIBCALLBACK *star_callback)PROTO((Int4 done, Int4 positives));
2619    int (LIBCALLBACK *index_callback)PROTO((Int4 done, Int4 positives)); */
2620 
2621 /* tells star_proc to check that a star should be emitted. */
2622 /* TNlmThread awake_thr=NULL;
2623    Boolean awake; */
2624 
2625 /* tells index_proc to check that a message should be emitted. */
2626 /* TNlmThread index_thr=NULL;
2627    Boolean awake_index; */
2628 
2629 /* period of sending out a star/message. */
2630 /* #define PERIOD 60 */
2631 
2632 /* Use by star_proc to determine whether to emit a star. */
2633 /* time_t last_tick=0; */
2634 
2635 /* How many positive hits were found (set by ReapHitlist, read by tick_proc
2636 and star_proc). */
2637 /* Int4 number_of_pos_hits=0; */
2638 
2639 /* Mutex for assignment of db seqs to search. */
2640 /* TNlmMutex db_mutex=NULL; */
2641 
2642 /* Mutex for insertion of results into list. */
2643 /* TNlmMutex results_mutex = NULL; */
2644 /* Mutex for the callbacks (star_proc, tick_proc, index_proc). */
2645 /* TNlmMutex callback_mutex=NULL; */
2646 
2647 /* The last db sequence to be assigned.  Used only in get_db_chunk after
2648 the acquisition of the "db_mutex" (above). */
2649 /* Int4 db_chunk_last=0; */
2650 
2651 /* the last sequence in the database to be compared against. */
2652 /* Int4 final_db_seq; */
2653 
2654 /* Default size of the chunks be that are assigned in the function get_db_chunk. */
2655 /* Actually db_chunk_size is used, which is smaller if the db is smaller. */
2656 
2657 static Int4 BlastExtendWordSearch PROTO((BlastSearchBlkPtr search, Boolean multiple_hits));
2658 
2659 static Int2 BlastWordExtend PROTO((BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context));
2660 
2661 /*AAS*/
2662 static Int2 BlastNewWordExtend PROTO((BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context));
2663 
2664 static Int2 BlastWordExtend_prelim PROTO((BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context));
2665 
2666 /*AAS*/
2667 static Int2 BlastNewWordExtend_prelim PROTO((BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context));
2668 
2669 
2670 static Int4 BlastWordFinder PROTO((BlastSearchBlkPtr search));
2671 static Int4 BlastWordFinder_mh PROTO((BlastSearchBlkPtr search));
2672 static Int4 BlastWordFinder_contig PROTO((BlastSearchBlkPtr search, LookupTablePtr lookup));
2673 static Int4 BlastWordFinder_mh_contig PROTO((BlastSearchBlkPtr search, LookupTablePtr lookup));
2674 
2675 static BLAST_HSPPtr link_hsps PROTO((BlastSearchBlkPtr search, BLAST_HitListPtr hitlist, BLAST_HSPPtr PNTR hsp_array));
2676 
2677 static Int4 BlastNtWordFinder PROTO((BlastSearchBlkPtr search, LookupTablePtr lookup));
2678 static Int4 BlastNtWordFinder_mh PROTO((BlastSearchBlkPtr search, LookupTablePtr lookup));
2679 
2680 /* DEBUGGING stuff */
2681 #ifdef BLAST_TIMER
2682 clock_t	last_clock = 0;
2683 #endif
2684 /* end DEBUGGING stuff */
2685 
2686 
2687 /*
2688 	The function that decides whether or not a tick should be
2689 	emitted.  This is performed through the callback function
2690 	("tick_callback") that is set in "do_the_blast_run".  This
2691 	function is called from "do_blast_search" for single processing
2692 	machines and "get_db_chunk" for MT machines, after the db_mutex
2693 	has been obtained in "get_db_chunk".
2694 */
2695 
BlastTickProc(Int4 sequence_number,BlastThrInfoPtr thr_info)2696 void BlastTickProc(Int4 sequence_number, BlastThrInfoPtr thr_info)
2697 
2698 {
2699     if(thr_info->tick_callback &&
2700        (sequence_number > (thr_info->last_db_seq + thr_info->db_incr))) {
2701         NlmMutexLockEx(&thr_info->callback_mutex);
2702         thr_info->last_db_seq += thr_info->db_incr;
2703         thr_info->tick_callback(sequence_number, thr_info->number_of_pos_hits);
2704         thr_info->last_tick = Nlm_GetSecs();
2705         NlmMutexUnlock(thr_info->callback_mutex);
2706     }
2707     return;
2708 }
2709 
2710 /*
2711 	Sends out a message every PERIOD (i.e., 60 secs.) for the index.
2712 
2713 	THis function runs as a separate thread and only runs on a threaded
2714 	platform.
2715 */
2716 VoidPtr
index_proc(VoidPtr dummy)2717 index_proc(VoidPtr dummy)
2718 
2719 {
2720 
2721     /* Sleep only works on UNIX.  An ifdef is used until
2722        a portable solution can be found. */
2723 #ifdef OS_UNIX
2724 
2725     Int2 index;
2726     BlastThrInfoPtr thr_info = (BlastThrInfoPtr) dummy;
2727 
2728     while (thr_info->awake_index) {
2729         for (index=0; index < STAR_MSG_PERIOD; index++) {
2730             sleep(1);
2731             if (thr_info->awake_index == FALSE)
2732                 break;
2733         }
2734 
2735         if (thr_info->awake_index && thr_info->index_callback) {
2736             NlmMutexLockEx(&thr_info->callback_mutex);
2737             thr_info->last_tick = Nlm_GetSecs();
2738             thr_info->index_callback(0, 0);
2739             NlmMutexUnlock(thr_info->callback_mutex);
2740         }
2741     }
2742 #endif
2743     return dummy;
2744 }
2745 
2746 /*
2747 	Sends out a message every PERIOD (i.e., 60 secs.) and sends out a
2748 	"star" if a tick has not been sent out in the last PERIOD.
2749 
2750 	THis function runs as a separate thread and only runs on a threaded
2751 	platform.
2752 */
2753 static VoidPtr
star_proc(VoidPtr dummy)2754 star_proc(VoidPtr dummy)
2755 
2756 {
2757     /* Sleep only works on UNIX.  An ifdef is used until
2758        a portable solution can be found. */
2759 #ifdef OS_UNIX
2760 
2761     time_t now;
2762     Int2 index;
2763     BlastThrInfoPtr thr_info = (BlastThrInfoPtr) dummy;
2764 
2765     now = Nlm_GetSecs();
2766     while (thr_info->awake) {
2767         if (now - thr_info->last_tick < STAR_MSG_PERIOD / 2) {
2768             for (index = 0; index < STAR_MSG_PERIOD; index++) {
2769                 sleep(1);
2770                 if (thr_info->awake == FALSE)
2771                     break;
2772             }
2773         }
2774         if (thr_info->awake) {
2775             NlmMutexLockEx(&thr_info->callback_mutex);
2776             now = Nlm_GetSecs();
2777             if (now-thr_info->last_tick > STAR_MSG_PERIOD) {
2778                 if (thr_info->star_callback) {
2779                     thr_info->star_callback(thr_info->db_chunk_last,
2780                                             thr_info->number_of_pos_hits);
2781                     thr_info->last_tick = now;
2782                 }
2783             }
2784             NlmMutexUnlock(thr_info->callback_mutex);
2785         }
2786     }
2787 #endif
2788     return dummy;
2789 }
2790 
2791 /*
2792 	Make a temporary protein BioseqPtr to use with seg.
2793 */
2794 BioseqPtr
BlastMakeTempProteinBioseq(Uint1Ptr sequence,Int4 length,Uint1 alphabet)2795 BlastMakeTempProteinBioseq (Uint1Ptr sequence, Int4 length, Uint1 alphabet)
2796 
2797 {
2798     BioseqPtr bsp;
2799     Int4 byte_store_length;
2800     Nlm_ByteStorePtr byte_store;
2801     ObjectIdPtr oip;
2802 
2803     if (sequence == NULL || length == 0)
2804         return NULL;
2805 
2806     byte_store = Nlm_BSNew(length);
2807 
2808     byte_store_length = Nlm_BSWrite(byte_store, (VoidPtr) sequence, length);
2809     if (length != byte_store_length) {
2810         Nlm_BSDelete(byte_store, length);
2811         return NULL;
2812     }
2813 
2814     bsp = BioseqNew();
2815     bsp->seq_data = (SeqDataPtr) byte_store;
2816     bsp->length = length;
2817     bsp->seq_data_type = alphabet;
2818     bsp->mol = Seq_mol_aa;
2819     bsp->repr = Seq_repr_raw;
2820 
2821     oip = UniqueLocalId();
2822     ValNodeAddPointer(&(bsp->id), SEQID_LOCAL, oip);
2823     SeqMgrAddToBioseqIndex(bsp);
2824 
2825     return bsp;
2826 }
2827 
2828 
2829 #define LINK_HSP_OVERLAP 9
2830 #define MY_EPS 1.0e-9
2831 /*
2832 	Calculates cutoff scores and returns them.
2833 	Equations provided by Stephen Altschul.
2834 
2835 	BlastSearchBlkPtr search: provides info to perform calculation.
2836 	Int4 subject_length: length of the DB sequence.
2837 	Boolean PNTR ignore_small_gaps: If TRUE, test only for large gaps.
2838 	BLAST_Score PNTR cutoff_s_second: S2 score for second pass.
2839 	BLAST_Score PNTR cutoff_big_gap: Cutoff score for big gaps.
2840 
2841 */
2842 static void
CalculateSecondCutoffScore(BlastSearchBlkPtr search,Int4 subject_length,Boolean PNTR ignore_small_gaps,BLAST_Score PNTR cutoff_s_second,BLAST_Score PNTR cutoff_big_gap)2843 CalculateSecondCutoffScore(BlastSearchBlkPtr search, Int4 subject_length, Boolean PNTR ignore_small_gaps, BLAST_Score PNTR cutoff_s_second, BLAST_Score PNTR cutoff_big_gap)
2844 
2845 {
2846     const Int4 overlap_size = LINK_HSP_OVERLAP;
2847 	Nlm_FloatHi gap_prob, gap_decay_rate, x_variable, y_variable;
2848 	BLAST_KarlinBlkPtr kbp;
2849 	Int4 expected_length, window_size, query_length;
2850 	Int8 search_sp;
2851 
2852 	/* Do this for the first context, should this be changed?? */
2853 	kbp = search->sbp->kbp[search->first_context];
2854 	window_size = search->pbp->gap_size + overlap_size + 1;
2855 	gap_prob = search->pbp->gap_prob;
2856 	gap_decay_rate = search->pbp->gap_decay_rate;
2857 	query_length = search->context[search->first_context].query->length;
2858 
2859 	if (search->pbp->old_stats == FALSE)
2860 	{
2861 	/* Subtract off the expected score. */
2862 	   expected_length = Nint(log(kbp->K*((Nlm_FloatHi) query_length)*((Nlm_FloatHi) subject_length))/(kbp->H));
2863 	   query_length = query_length - expected_length;
2864 	   subject_length = subject_length - expected_length;
2865 	   query_length = MAX(query_length, 1);
2866 	   subject_length = MAX(subject_length, 1);
2867 
2868 	   if (search->dblen > subject_length)
2869 	   	y_variable = log((Nlm_FloatHi) (search->dblen)/(Nlm_FloatHi) subject_length)*(kbp->K)/(gap_decay_rate);
2870 	   else
2871 	   	y_variable = log((Nlm_FloatHi) (subject_length + expected_length)/(Nlm_FloatHi) subject_length)*(kbp->K)/(gap_decay_rate);
2872 	   search_sp = ((Int8) query_length)* ((Int8) subject_length);
2873 	   x_variable = 0.25*y_variable*((FloatHi) search_sp);
2874 
2875 /* To use "small" gaps the query and subject must be "large" compared to
2876 the gap size. If small gaps may be used, then the cutoff values must be
2877 adjusted for the "bayesian" possibility that both large and small gaps are
2878 being checked for. */
2879 
2880 	   if (search_sp > 8*window_size*window_size)
2881 	   {
2882 		x_variable /= (1.0 - gap_prob + MY_EPS);
2883 		*cutoff_big_gap = (BLAST_Score) floor((log(x_variable)/kbp->Lambda)) + 1;
2884 		x_variable = y_variable*(window_size*window_size);
2885 		x_variable /= (gap_prob + MY_EPS);
2886 		*cutoff_s_second= (BLAST_Score) floor((log(x_variable)/kbp->Lambda)) + 1;
2887                 /* Don't allow this cutoff to be too small */
2888                 *cutoff_s_second = MAX(*cutoff_s_second, search->pbp->gap_trigger);
2889 		*ignore_small_gaps = FALSE;
2890 	   }
2891 	   else
2892 	   {
2893 		*cutoff_big_gap = (BLAST_Score) floor((log(x_variable)/kbp->Lambda)) + 1;
2894 		*cutoff_s_second = *cutoff_big_gap;
2895 		*ignore_small_gaps = TRUE;
2896 	   }
2897 	   *cutoff_big_gap *= search->pbp->scalingFactor;
2898 	   *cutoff_s_second *= search->pbp->scalingFactor;
2899 	}
2900 	else
2901 	{
2902 	/* USE the old statistics, for comparison to the OLD BLAST. */
2903 		*cutoff_big_gap = search->pbp->cutoff_s_second;
2904 		*cutoff_s_second = *cutoff_big_gap;
2905 		*ignore_small_gaps = TRUE;
2906 	}
2907 }
2908 
2909 /*
2910 Rounds down score to next even value if appropriate.
2911 */
2912 
2913 static Int2
s_RoundDownOddScores(BLAST_ScoreBlkPtr sbp,BLAST_HitListPtr hitlist)2914 s_RoundDownOddScores(BLAST_ScoreBlkPtr sbp, BLAST_HitListPtr hitlist)
2915 {
2916 	BLAST_HSPPtr PNTR hsp_array;
2917 	Int4 hsp_cnt;
2918 	Int4 index;
2919 
2920 	if (sbp->round_down == FALSE || hitlist->hspcnt == 0)
2921 		return 0;
2922 
2923 	hsp_cnt = hitlist->hspcnt;
2924 	hsp_array = hitlist->hsp_array;
2925 	for (index=0; index<hsp_cnt; index++)
2926 	{
2927                 hsp_array[index]->score -= (hsp_array[index]->score &1);
2928 	}
2929 	return 0;
2930 }
2931 
2932 /*
2933 	This function reevaluates the HSP's from a blast run, checking that
2934 	ambiguity characters, ignored until now, don't change the score or
2935 	extent of the HSP's.
2936 
2937 	Only works for blastn right now.
2938 */
2939 
2940 static Int2
BlastReevaluateWithAmbiguities(BlastSearchBlkPtr search,Int4 sequence_number)2941 BlastReevaluateWithAmbiguities (BlastSearchBlkPtr search, Int4 sequence_number)
2942 
2943 {
2944 	BioseqPtr bsp;
2945 	register BLAST_Score	sum, score;
2946 	register BLAST_ScorePtr PNTR    matrix;
2947 	BLAST_HitListPtr current_hitlist;
2948 	BLAST_HSPPtr PNTR hsp_array;
2949 	Int4 context, hspcnt, hspcnt_max, index, index1, status;
2950 	Int4 length, longest_hsp_length, start, stop;
2951 	Nlm_FloatHi current_evalue=DBL_MAX;
2952 	SeqPortPtr spp=NULL;
2953 	Uint1Ptr nt_seq, nt_seq_start, subject, subject_start, query, old_query_s, old_query_f, new_query_s, new_query_f=NULL;
2954 	Uint1Ptr query_start, query_end, subject_real_start=NULL;
2955         Int4 num_ident;
2956 
2957 /* Only nucl. db's. */
2958 	if (search->prog_number == blast_type_blastp || search->prog_number == blast_type_blastx)
2959 		return 0;
2960 
2961 /* Gapped alignments will be reevaluated anyway.*/
2962 	if (search->pbp->gapped_calculation == TRUE || search->pbp->do_not_reevaluate == TRUE)
2963 		return 0;
2964 
2965 /* No hits to reevaluate. */
2966 	if (search->current_hitlist == NULL || search->current_hitlist->hspcnt == 0)
2967 		return 0;
2968 
2969 /* Check if there are ambiguites at all, return 0 if there are none. */
2970 	if(search->prog_number != blast_type_blastn &&
2971            readdb_ambchar_present(search->rdfp, sequence_number) == FALSE) {
2972 
2973 		return 0;
2974         }
2975 	current_hitlist = search->current_hitlist;
2976 	hspcnt = current_hitlist->hspcnt;
2977 	hspcnt_max = current_hitlist->hspcnt_max;
2978 	hsp_array = current_hitlist->hsp_array;
2979 	matrix = search->sbp->matrix;
2980 
2981 	/* Look for longest HSP. */
2982 	longest_hsp_length = 0;
2983 	for (index=0; index<hspcnt_max; index++)
2984 	{
2985 		if (hsp_array[index] == NULL)
2986 			continue;
2987 
2988 		if (hsp_array[index]->subject.length > longest_hsp_length)
2989 			longest_hsp_length = hsp_array[index]->subject.length;
2990 
2991 		if (current_evalue > hsp_array[index]->evalue)
2992 			current_evalue = hsp_array[index]->evalue;
2993 	}
2994 
2995 	if (StringCmp(search->prog_name, "blastn") != 0)
2996 	{
2997 		longest_hsp_length *= CODON_LENGTH;
2998 	}
2999 
3000 	if (longest_hsp_length > 0)
3001 	{
3002 		nt_seq_start = MemNew(longest_hsp_length*sizeof(Uint1));
3003 		if (nt_seq_start == NULL)
3004 			return 0;
3005 	}
3006 	else
3007 	{
3008 		return longest_hsp_length;
3009 	}
3010 
3011         if (search->thr_info->ambiguities_mutex)
3012             NlmMutexLock(search->thr_info->ambiguities_mutex);
3013 
3014 	bsp = readdb_get_bioseq(search->rdfp, sequence_number);
3015 
3016 	for (index=0; index<hspcnt_max; index++)
3017 	{
3018 		if (hsp_array[index] == NULL)
3019 			continue;
3020 
3021 		context = hsp_array[index]->context;
3022 
3023 		if (StringCmp(search->prog_name, "blastn") == 0)
3024 		{
3025 			start = hsp_array[index]->subject.offset;
3026 			stop = hsp_array[index]->subject.end - 1;
3027 			length = hsp_array[index]->subject.length;
3028 		}
3029 		else
3030 		{	/* Convert for translated alphabet. */
3031 		    if (hsp_array[index]->subject.frame > 0)
3032 		    {
3033 			start = hsp_array[index]->subject.frame - 1 + CODON_LENGTH*(hsp_array[index]->subject.offset);
3034 			stop = start + CODON_LENGTH*(hsp_array[index]->subject.length) - 1;
3035 			length = CODON_LENGTH*(hsp_array[index]->subject.length);
3036 		    }
3037 		    else
3038 		    {
3039 			start = bsp->length - CODON_LENGTH*(hsp_array[index]->subject.offset + hsp_array[index]->subject.length) + hsp_array[index]->subject.frame + 1;
3040 			stop = bsp->length - CODON_LENGTH*(hsp_array[index]->subject.offset) + hsp_array[index]->subject.frame;
3041 			length = CODON_LENGTH*(hsp_array[index]->subject.length);
3042 		     }
3043 		}
3044 
3045 		if (hsp_array[index]->subject.frame > 0)
3046 		{
3047 			spp = SeqPortNew(bsp, start, stop, Seq_strand_plus, Seq_code_ncbi4na);
3048                         SeqPortSet_do_virtual(spp, TRUE);
3049 
3050 		}
3051 		else
3052 		{	/* Offsets correct here?? */
3053 			spp = SeqPortNew(bsp, start, stop, Seq_strand_minus, Seq_code_ncbi4na);
3054                         SeqPortSet_do_virtual(spp, TRUE);
3055 		}
3056 
3057 		if (StringCmp(search->prog_name, "blastn") == 0)
3058 		{
3059 			nt_seq = nt_seq_start;
3060 			while (length > 0)
3061 			{
3062 				*nt_seq = ncbi4na_to_blastna[SeqPortGetResidue(spp)];
3063 				nt_seq++;
3064 				length--;
3065 			}
3066 			subject_start = nt_seq_start;
3067 		}
3068 		else
3069 		{
3070 			nt_seq = nt_seq_start;
3071 			while (length > 0)
3072 			{
3073 				*nt_seq = SeqPortGetResidue(spp);
3074 				nt_seq++;
3075 				length--;
3076 			}
3077 			/* Set frame to one so we start at beginning of nt seq. */
3078 			subject_real_start = GetTranslation(nt_seq_start, CODON_LENGTH*(hsp_array[index]->subject.length), 1, &length, search->db_genetic_code);
3079 			/* The first Residue is a NULLB */
3080 			subject_start = subject_real_start+1;
3081 		}
3082 		spp = SeqPortFree(spp);
3083 
3084 		query_start = (Uint1Ptr) search->context[context].query->sequence;
3085 		query_end = query_start + search->context[context].query->length;
3086 
3087 		score = 0;
3088 		sum = 0;
3089                 num_ident = 0;
3090 		subject = subject_start;
3091 		old_query_s = query_start + hsp_array[index]->query.offset;
3092 		old_query_f = query_start + hsp_array[index]->query.end;
3093 		/* Assume, for now, that the real HSP starts where it does now. */
3094 		new_query_s = old_query_s;
3095 		for (query=old_query_s; query<old_query_f; query++, subject++)
3096 		{
3097                    if (*query == *subject)
3098                       ++num_ident;
3099 
3100 			if ((sum += matrix[*query][*subject]) < 0)
3101 			{
3102 				if (score > 0)
3103 				{
3104 					if (score >= search->pbp->cutoff_s2)
3105 					{
3106 						break;
3107 					}
3108 				}
3109 				score = sum = 0;
3110                                 num_ident = 0;
3111 				new_query_s = new_query_f = query;
3112 			}
3113 			else if (sum > score)
3114 			{	/* Start of scoring regime. */
3115                            if (score == 0)
3116                               new_query_s = query;
3117                            score = sum;
3118                            new_query_f = query+1;
3119 			}
3120 		}
3121 
3122 		if (score >= search->pbp->cutoff_s2)
3123 		{ /* Adjust the information here. */
3124 			hsp_array[index]->score = score;
3125 			hsp_array[index]->query.offset = new_query_s - query_start;
3126 			hsp_array[index]->query.end = new_query_f - query_start;
3127 			hsp_array[index]->query.length = hsp_array[index]->query.end - hsp_array[index]->query.offset;
3128 			hsp_array[index]->subject.offset = hsp_array[index]->subject.offset + new_query_s - old_query_s;
3129 			hsp_array[index]->subject.end = hsp_array[index]->subject.end + new_query_f - old_query_f;
3130 			hsp_array[index]->subject.length = hsp_array[index]->subject.end - hsp_array[index]->subject.offset;
3131                         hsp_array[index]->num_ident = num_ident;
3132 			hsp_array[index]->linked_set = FALSE;
3133 			hsp_array[index]->start_of_chain = FALSE;
3134 			Nlm_MemSet((VoidPtr) &(hsp_array[index]->hsp_link), 0, sizeof(BLAST_HSP_LINK));
3135 			/* Need to NULL out more in HSP? */
3136 		}
3137 		else
3138 		{ /* Delete if this is now below the cutoff score. */
3139 			hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
3140 		}
3141 
3142 		if (StringCmp(search->prog_name, "blastn") != 0)
3143 		{
3144 			subject_real_start = MemFree(subject_real_start);
3145 		}
3146 	}
3147 
3148 	bsp = BioseqFree(bsp);
3149         if (search->thr_info->ambiguities_mutex)
3150                 NlmMutexUnlock(search->thr_info->ambiguities_mutex);
3151 	nt_seq_start = MemFree(nt_seq_start);
3152 
3153 /* Save HSP's again, discarding those that have been NULLed out. */
3154 	index1 = HspArrayPurge(hsp_array, hspcnt_max, TRUE);
3155 	current_hitlist->hspcnt = index1;
3156 	current_hitlist->hspcnt_max = index1;
3157 
3158         s_RoundDownOddScores(search->sbp, search->current_hitlist);
3159 	/* Relink the HSP's, ReReap the Hits. */
3160 	if (!search->pbp->mb_params && search->pbp->do_sum_stats == TRUE) {
3161            status = BlastLinkHsps(search);
3162         } else {
3163            status = BlastGetNonSumStatsEvalue(search);
3164         }
3165         status = BlastReapHitlistByEvalue(search);
3166 
3167 	return status;
3168 }
3169 
3170 /* Auxiliary function to retrieve the virtual oidlist attached to the
3171  * rdfp_chain. Returns a pointer to the OIDList, called should *NOT* modify
3172  * this copy. Assumes that this function is called after BlastProcessGiLists
3173  * has been called (while setting up the search) */
BlastGetVirtualOIDList(ReadDBFILEPtr rdfp_chain)3174 OIDListPtr LIBCALL BlastGetVirtualOIDList(ReadDBFILEPtr rdfp_chain)
3175 {
3176     OIDListPtr virtual_oidlist = NULL;
3177 
3178     while (rdfp_chain) {
3179         if (virtual_oidlist = rdfp_chain->oidlist) {
3180             break;
3181         }
3182         rdfp_chain = rdfp_chain->next;
3183     }
3184     return virtual_oidlist;
3185 }
3186 
3187 /*
3188 	Function to assign chunks of the database to a thread.
3189 	The "start" and "stop" points are returned by the arguments.
3190 	Note that this is a half-closed interval (stop is not searched).
3191 
3192 	The Int4 "db_chunk_last" (a global variable) keeps track of the last
3193 	database number assigned and is only changed if the db_mutex has been acquired.
3194 
3195 	The Boolean done specifies that the search has already been
3196 	completed.
3197 */
3198 
BlastGetDbChunk(ReadDBFILEPtr rdfp,Int4Ptr start,Int4Ptr stop,Int4Ptr id_list,Int4Ptr id_list_number,BlastThrInfoPtr thr_info)3199 Boolean BlastGetDbChunk(ReadDBFILEPtr rdfp, Int4Ptr start, Int4Ptr stop,
3200                      Int4Ptr id_list, Int4Ptr id_list_number,
3201                      BlastThrInfoPtr thr_info)
3202 
3203 {
3204     Boolean done=FALSE;
3205     OIDListPtr virtual_oidlist = NULL;
3206     *id_list_number = 0;
3207 
3208     NlmMutexLockEx(&thr_info->db_mutex);
3209     if (thr_info->realdb_done) {
3210         if (virtual_oidlist = BlastGetVirtualOIDList(rdfp)) {
3211 	    /* Virtual database.   Create id_list using mask file */
3212 	    Int4 gi_end       = 0;
3213 
3214 	    thr_info->final_db_seq = MIN(thr_info->final_db_seq, virtual_oidlist->total);
3215 
3216 	    gi_end = thr_info->final_db_seq;
3217 
3218 	    if (thr_info->gi_current < gi_end) {
3219 		Int4 oidindex  = 0;
3220 		Int4 gi_start  = thr_info->gi_current;
3221 		Int4 bit_start = gi_start % MASK_WORD_SIZE;
3222 		Int4 gi;
3223 
3224 		for(gi = gi_start; (gi < gi_end) && (oidindex < thr_info->db_chunk_size);) {
3225 		    Int4 bit_end = ((gi_end - gi + bit_start) < MASK_WORD_SIZE) ? (gi_end - gi + bit_start) : MASK_WORD_SIZE;
3226 		    Int4 bit;
3227 
3228 		    Uint4 mask_index = gi / MASK_WORD_SIZE;
3229 		    Uint4 mask_word  = Nlm_SwapUint4(virtual_oidlist->list[mask_index]);
3230 
3231 		    if ( mask_word ) {
3232 			for(bit = bit_start; bit<bit_end; bit++) {
3233 			    Uint4 bitshift = (MASK_WORD_SIZE-1)-bit;
3234 
3235 			    if ((mask_word >> bitshift) & 1) {
3236 				id_list[ oidindex++ ] = (gi - bit_start) + bit;
3237 			    }
3238 			}
3239 		    }
3240 
3241 		    gi += bit_end - bit_start;
3242 		    bit_start = 0;
3243 		}
3244 
3245 		thr_info->gi_current = gi;
3246 		*id_list_number = oidindex;
3247 		BlastTickProc(thr_info->gi_current/32, thr_info);
3248 	    } else {
3249 		done = TRUE;
3250 	    }
3251 
3252 	} else {
3253 	    done = TRUE;
3254         }
3255     } else {
3256 	int real_readdb_entries;
3257 	int total_readdb_entries;
3258 	int final_real_seq;
3259 
3260 	real_readdb_entries  = readdb_get_num_entries_total_real(rdfp);
3261 	total_readdb_entries = readdb_get_num_entries_total(rdfp);
3262 	final_real_seq       = MIN( real_readdb_entries, thr_info->final_db_seq );
3263 
3264 	/* we have real database with start/stop specified */
3265         if (thr_info->db_mutex) {
3266             /* Emit a tick if needed. */
3267             BlastTickProc(thr_info->db_chunk_last, thr_info);
3268             *start = thr_info->db_chunk_last;
3269             if (thr_info->db_chunk_last < final_real_seq) {
3270                 *stop = MIN((thr_info->db_chunk_last +
3271                     thr_info->db_chunk_size), final_real_seq);
3272             } else {/* Already finished. */
3273                 *stop = thr_info->db_chunk_last;
3274 
3275 		/* Change parameters for oidlist processing. */
3276                 thr_info->realdb_done  = TRUE;
3277             }
3278             thr_info->db_chunk_last = *stop;
3279         } else {
3280             if (*stop != final_real_seq) {
3281                 done = FALSE;
3282                 *start = thr_info->last_db_seq;
3283                 *stop  = final_real_seq;
3284             } else {
3285                 thr_info->realdb_done = TRUE;
3286 
3287 		if (total_readdb_entries == real_readdb_entries) {
3288 		    done = TRUE;
3289 		} else {
3290 		    thr_info->gi_current = final_real_seq;
3291 		}
3292             }
3293         }
3294     }
3295 
3296     NlmMutexUnlock(thr_info->db_mutex);
3297     return done;
3298 }
3299 
3300 static VoidPtr
do_gapped_blast_search(VoidPtr ptr)3301 do_gapped_blast_search(VoidPtr ptr)
3302 
3303 {
3304     BlastSearchBlkPtr search;
3305     Int2 status=0;
3306     Int4 index, index1, start=0, stop=0, id_list_length;
3307     Int4Ptr id_list=NULL;
3308     Uint4 i; /* AM: Support for query concatenation. */
3309 
3310     search = (BlastSearchBlkPtr) ptr;
3311     if (search->thr_info->blast_gi_list || BlastGetVirtualOIDList(search->rdfp))
3312     {                                     /* FIXME: magic constant? */
3313 	id_list = MemNew((search->thr_info->db_chunk_size+33)*sizeof(Int4));
3314     }
3315 
3316     while (BlastGetDbChunk(search->rdfp, &start, &stop, id_list,
3317                            &id_list_length, search->thr_info) != TRUE)
3318     {
3319         if (id_list && id_list_length)
3320         {
3321             for (index=0; index<id_list_length; index++)
3322             {
3323                 index1 = id_list[index];
3324                 if ((status =
3325                      BLASTPerformSearchWithReadDb(search, index1)) != 0)
3326                     break;
3327 
3328                 if (search->pbp->do_sum_stats) {
3329                     status = BlastLinkHsps(search);
3330                 }
3331                 status = BlastReapHitlistByEvalue(search);
3332                 if (search->handle_results)
3333                     search->handle_results((VoidPtr) search);
3334                 else
3335                     BlastSaveCurrentHitlist(search);
3336                 /* Emit a tick if needed and we're not MT. */
3337                 if (search->thr_info->db_mutex == NULL)
3338                     BlastTickProc(index1, search->thr_info);
3339                 if (time_out_boolean == TRUE)
3340                     break;
3341             }
3342         } else if (!search->thr_info->realdb_done) {
3343             for (index=start; index<stop; index++)
3344             {
3345                 if ((status = BLASTPerformSearchWithReadDb(search, index)) != 0)
3346                     break;
3347 
3348                 /* AM: Support for query concatenation. */
3349         if( !search->mult_queries )
3350         {
3351             if (search->pbp->do_sum_stats) {
3352                 status = BlastLinkHsps(search);
3353             }
3354             status = BlastReapHitlistByEvalue(search);
3355             if (search->handle_results)
3356                 search->handle_results((VoidPtr) search);
3357             else
3358                 BlastSaveCurrentHitlist(search);
3359         }
3360 		else /* AM: Support for query concatenation. */
3361 		{
3362 		  InitHitLists( search );
3363 		  search->mult_queries->use_mq = TRUE;
3364 		  search->mult_queries->delete_current_hitlist = FALSE;
3365 
3366                   for (i = 0;  i < search->mult_queries->NumQueries;  ++i) {
3367                       search->mult_queries->current_query = i;
3368 
3369                       if (search->pbp->do_sum_stats) {
3370                           status = BlastLinkHsps(search);
3371                       }
3372                       status = BlastReapHitlistByEvalue(search);
3373 
3374                       if (search->handle_results)
3375                           search->handle_results( (VoidPtr)search );
3376                       else
3377                           BlastSaveCurrentHitlist(search);
3378                   }
3379 
3380 		  if( search->mult_queries->delete_current_hitlist )
3381 		  {
3382 		    search->current_hitlist
3383 		      = BlastHitListDestruct( search->current_hitlist );
3384 		  }
3385 
3386 		  search->mult_queries->use_mq = FALSE;
3387                   BlastHitListPurge( search->current_hitlist );
3388 		}
3389 
3390                 /* Emit a tick if needed and we're not MT. */
3391                 if (search->thr_info->db_mutex == NULL)
3392                     BlastTickProc(index, search->thr_info);
3393                 if (time_out_boolean == TRUE)
3394                     break;
3395             }
3396         }
3397         /* Get out if "stop" was the last seq. */
3398         if (time_out_boolean || status)
3399             break;
3400     }
3401 
3402     if (id_list)
3403         id_list = MemFree(id_list);
3404 
3405     return (VoidPtr) search;
3406 }
3407 
3408 static VoidPtr
do_blast_search(VoidPtr ptr)3409 do_blast_search(VoidPtr ptr)
3410 
3411 {
3412     BlastSearchBlkPtr search;
3413     Int2 status = 0;
3414     Int4 index, index1, start=0, stop=0, id_list_length;
3415     Int4Ptr id_list=NULL;
3416     Uint4 i; /* AM: Query multiplexing. */
3417 
3418     search = (BlastSearchBlkPtr) ptr;
3419 	if (search->thr_info->blast_gi_list || BlastGetVirtualOIDList(search->rdfp))
3420     {                                     /* FIXME: magic constant? */
3421         id_list = MemNew((search->thr_info->db_chunk_size+33)
3422                          *sizeof(Int4));
3423     }
3424 
3425     while (BlastGetDbChunk(search->rdfp, &start, &stop, id_list,
3426 			   &id_list_length, search->thr_info) != TRUE) {
3427         if (search->thr_info->realdb_done && id_list) {
3428             for (index=0; index<id_list_length; index++) {
3429                 index1 = id_list[index];
3430                 if ((status = BLASTPerformSearchWithReadDb(search, index1))
3431                     != 0)
3432                    break;
3433                 s_RoundDownOddScores(search->sbp, search->current_hitlist);
3434                 if (!search->pbp->mb_params) {
3435                    if (search->pbp->do_sum_stats == TRUE)
3436                       status = BlastLinkHsps(search);
3437                    else
3438                       status = BlastGetNonSumStatsEvalue(search);
3439                    status = BlastReapHitlistByEvalue(search);
3440                    if (!search->handle_results)
3441                       status = BlastReevaluateWithAmbiguities(search, index1);
3442                 } else {
3443                    MegaBlastReevaluateWithAmbiguities(search);
3444                 }
3445 
3446                 if (search->handle_results)
3447                     search->handle_results((VoidPtr) search);
3448 		else if (!search->pbp->mb_params)
3449 		   BlastSaveCurrentHitlist(search);
3450                 else
3451 		   MegaBlastSaveCurrentHitlist(search);
3452                 if (search->pbp->mb_params)
3453                    /* Free the ncbi4na-encoded sequence */
3454                    search->subject->sequence_start = (Uint1Ptr)
3455                       MemFree(search->subject->sequence_start);
3456                 /* Emit a tick if needed and we're not MT. */
3457                 if (search->thr_info->db_mutex == NULL)
3458                     BlastTickProc(index1, search->thr_info);
3459                 if (time_out_boolean == TRUE)
3460                     break;
3461             }
3462         } else if (!search->thr_info->realdb_done) {
3463             for (index=start; index<stop; index++) {
3464                 if ((status = BLASTPerformSearchWithReadDb(search, index))
3465                     != 0)
3466                    break;
3467                 s_RoundDownOddScores(search->sbp, search->current_hitlist);
3468                 if (!search->pbp->mb_params) {
3469                    if (search->pbp->do_sum_stats == TRUE)
3470                       status = BlastLinkHsps(search);
3471                    else
3472                       status = BlastGetNonSumStatsEvalue(search);
3473                    status = BlastReapHitlistByEvalue(search);
3474                    if (!search->handle_results)
3475                       status = BlastReevaluateWithAmbiguities(search, index);
3476                 } else {
3477                    MegaBlastReevaluateWithAmbiguities(search);
3478                 }
3479                 if (search->handle_results)
3480                    search->handle_results((VoidPtr) search);
3481 		else if (!search->pbp->mb_params)
3482 		{ /* AM: Query multiplexing. */
3483 		  if( !search->mult_queries )
3484 		    BlastSaveCurrentHitlist(search);
3485                   else
3486 		  {
3487 		    InitHitLists( search );
3488 		    search->mult_queries->use_mq = TRUE;
3489 		    search->mult_queries->delete_current_hitlist = FALSE;
3490 
3491 		    for( i = 0; i < search->mult_queries->NumQueries; ++i )
3492 		    {
3493 		      search->mult_queries->current_query = i;
3494 		      BlastSaveCurrentHitlist(search);
3495 		    }
3496 
3497 		    if( search->mult_queries->delete_current_hitlist )
3498 		    {
3499 		      search->current_hitlist
3500 		        = BlastHitListDestruct( search->current_hitlist );
3501 		    }
3502 
3503 		    search->mult_queries->use_mq = FALSE;
3504 		    BlastHitListPurge( search->current_hitlist );
3505 		  }
3506                 }
3507                 else
3508 		   MegaBlastSaveCurrentHitlist(search);
3509 
3510                 if (search->pbp->mb_params)
3511                    /* Free the ncbi4na-encoded sequence */
3512                    search->subject->sequence_start = (Uint1Ptr)
3513                       MemFree(search->subject->sequence_start);
3514                 /* Emit a tick if needed and we're not MT. */
3515                 if (search->thr_info->db_mutex == NULL)
3516                     BlastTickProc(index, search->thr_info);
3517                 if (time_out_boolean == TRUE)
3518                     break;
3519             }
3520         }
3521 
3522         /* Get out if "stop" was the last seq. */
3523         if (time_out_boolean || status)
3524             break;
3525     }
3526 
3527     if (id_list)
3528         id_list = MemFree(id_list);
3529 
3530     return (VoidPtr) search;
3531 }
3532 
3533 void LIBCALL
do_the_blast_run(BlastSearchBlkPtr search)3534 do_the_blast_run(BlastSearchBlkPtr search)
3535 
3536 {
3537     BlastSearchBlkPtr PNTR array;
3538     Char buffer[256];
3539     Int2 index;
3540     TNlmThread PNTR thread_array;
3541     VoidPtr status=NULL;
3542     int num_entries_total;
3543     int num_entries_total_real;
3544     int start_seq;
3545     int end_seq;
3546     Int4 i; /* AM: query concatenation */
3547 
3548     if (search == NULL)
3549         return;
3550 
3551     num_entries_total      = readdb_get_num_entries_total     (search->rdfp);
3552     num_entries_total_real = readdb_get_num_entries_total_real(search->rdfp);
3553 
3554     /* Set 'done with read db' according to whether real databases are present */
3555 
3556     if (num_entries_total_real) {
3557 	search->thr_info->realdb_done = FALSE;
3558     } else {
3559 	search->thr_info->realdb_done = TRUE;
3560     }
3561 
3562     /* Make sure first, last sequence indices are in-range (0, NUM-1) */
3563 
3564     /* NOTE: search->pbp->final_seq is 1 beyond the last sequence ordinal id,
3565        except when it's <=0, which means search to the last sequence in the
3566        database. */
3567     /* search->thr_info versions are not. */
3568 
3569     if (search->pbp->final_db_seq > 0) {
3570 	end_seq = MIN(search->pbp->final_db_seq, num_entries_total);
3571     } else {
3572 	end_seq = num_entries_total;
3573     }
3574 
3575     start_seq = MAX(0, MIN(search->pbp->first_db_seq, end_seq));
3576 
3577     /* Set BlastGetDbChunk()'s pointers and counters */
3578 
3579     search->thr_info->last_db_seq       =
3580 	search->thr_info->gi_current    =
3581 	search->thr_info->db_chunk_last = start_seq;
3582 
3583     search->thr_info->final_db_seq = end_seq;
3584 
3585     ConfigureDbChunkSize(search, search->dbseq_num);
3586 
3587     if (NlmThreadsAvailable() && search->pbp->process_num > 1) {
3588         NlmMutexInit(&search->thr_info->db_mutex);
3589         NlmMutexInit(&search->thr_info->results_mutex);
3590         NlmMutexInit(&search->thr_info->ambiguities_mutex);
3591 
3592         array = (BlastSearchBlkPtr PNTR) MemNew((search->pbp->process_num)*sizeof(BlastSearchBlkPtr));
3593         array[0] = search;
3594         for (index=1; index<search->pbp->process_num; index++) {
3595             array[index] = BlastSearchBlkDuplicate(search);
3596             if (array[index] == NULL) {
3597                search->pbp->process_num = index;
3598                ErrPostEx(SEV_WARNING, 0, 0, "Number of threads reduced to %d", index);
3599                break;
3600             }
3601         }
3602 
3603         thread_array = (TNlmThread PNTR) MemNew((search->pbp->process_num)*sizeof(TNlmThread));
3604         for (index=0; index<search->pbp->process_num; index++) {
3605             if (search->pbp->gapped_calculation && StringCmp(search->prog_name, "blastn") != 0)
3606                 thread_array[index] = NlmThreadCreateEx(do_gapped_blast_search, (VoidPtr) array[index], THREAD_RUN|THREAD_BOUND, eTP_Default, NULL, NULL);
3607             else
3608                 thread_array[index] = NlmThreadCreateEx(do_blast_search, (VoidPtr) array[index], THREAD_RUN|THREAD_BOUND, eTP_Default, NULL, NULL);
3609 
3610             if (NlmThreadCompare(thread_array[index], NULL_thread)) {
3611                 ErrPostEx(SEV_ERROR, 0, 0, "Unable to open thread.");
3612             }
3613         }
3614 
3615         for (index=0; index<search->pbp->process_num; index++) {
3616             NlmThreadJoin(thread_array[index], &status);
3617         }
3618 
3619         for (index=1; index<search->pbp->process_num; index++) {
3620 #ifdef BLAST_COLLECT_STATS
3621             search->first_pass_hits += array[index]->first_pass_hits;
3622             search->second_pass_hits += array[index]->second_pass_hits;
3623             search->second_pass_trys += array[index]->second_pass_trys;
3624             search->first_pass_extends += array[index]->first_pass_extends;
3625             search->second_pass_extends += array[index]->second_pass_extends;
3626             search->first_pass_good_extends += array[index]->first_pass_good_extends;
3627             search->second_pass_good_extends += array[index]->second_pass_good_extends;
3628             search->number_of_seqs_better_E += array[index]->number_of_seqs_better_E;
3629             search->prelim_gap_no_contest += array[index]->prelim_gap_no_contest;
3630             search->prelim_gap_passed += array[index]->prelim_gap_passed;
3631             search->prelim_gap_attempts += array[index]->prelim_gap_attempts;
3632             search->real_gap_number_of_hsps += array[index]->real_gap_number_of_hsps;
3633 #endif
3634 
3635             if( array[index]->mult_queries ) { /* AM: query concatenation: free resources */
3636                 if( array[index]->mult_queries->HitListArray )
3637                     for( i = 0; i < array[index]->mult_queries->NumQueries; ++i )
3638                         BlastHitListDestruct( array[index]->mult_queries->HitListArray[i] );
3639 
3640                 MemFree( array[index]->mult_queries->HitListArray );
3641                 MemFree( array[index]->mult_queries );
3642             }
3643             /* Not copied at thread start. */
3644             array[index] = BlastSearchBlkDestruct(array[index]);
3645         }
3646         array = MemFree(array);
3647 
3648         thread_array = MemFree(thread_array);
3649 
3650         NlmMutexDestroy(search->thr_info->db_mutex);
3651         search->thr_info->db_mutex = NULL;
3652         NlmMutexDestroy(search->thr_info->results_mutex);
3653         search->thr_info->results_mutex = NULL;
3654         NlmMutexDestroy(search->thr_info->ambiguities_mutex);
3655         search->thr_info->ambiguities_mutex = NULL;
3656     } else {
3657         if (search->pbp->gapped_calculation && StringCmp(search->prog_name, "blastn") != 0)
3658             do_gapped_blast_search((VoidPtr) search);
3659         else
3660             do_blast_search((VoidPtr) search);
3661     }
3662     if (search->rdfp->parameters & READDB_CONTENTS_ALLOCATED)
3663         search->rdfp = ReadDBCloseMHdrAndSeqFiles(search->rdfp);
3664     if (time_out_boolean) {
3665         sprintf(buffer, "CPU limit exceeded");
3666         BlastConstructErrorMessage("Blast", buffer, 2, &(search->error_return));
3667         search->timed_out = TRUE;
3668     }
3669 
3670     return;
3671 }
3672 
3673 Uint1
FrameToDefine(Int2 frame)3674 FrameToDefine(Int2 frame)
3675 
3676 {
3677 	Uint1 retval;
3678 
3679 	switch (frame) {
3680 		case -1:
3681 			retval = SEQLOC_MASKING_MINUS1;
3682 			break;
3683 		case -2:
3684 			retval = SEQLOC_MASKING_MINUS2;
3685 			break;
3686 		case -3:
3687 			retval = SEQLOC_MASKING_MINUS3;
3688 			break;
3689 		case 1:
3690 			retval = SEQLOC_MASKING_PLUS1;
3691 			break;
3692 		case 2:
3693 			retval = SEQLOC_MASKING_PLUS2;
3694 			break;
3695 		case 3:
3696 			retval = SEQLOC_MASKING_PLUS3;
3697 			break;
3698 		default:
3699 			retval = SEQLOC_MASKING_NOTSET;
3700 			break;
3701 	}
3702 
3703 	return retval;
3704 }
3705 Int2
DefineToFrame(Uint1 define)3706 DefineToFrame(Uint1 define)
3707 
3708 {
3709     Int2 frame;
3710 
3711     switch (define) {
3712     case SEQLOC_MASKING_MINUS1:
3713         frame = -1;
3714         break;
3715     case SEQLOC_MASKING_MINUS2:
3716         frame = -2;
3717         break;
3718     case SEQLOC_MASKING_MINUS3:
3719         frame = -3;
3720         break;
3721     case SEQLOC_MASKING_PLUS1:
3722         frame = 1;
3723         break;
3724     case SEQLOC_MASKING_PLUS2:
3725         frame = 2;
3726         break;
3727     case SEQLOC_MASKING_PLUS3:
3728         frame = 3;
3729         break;
3730     case SEQLOC_MASKING_NOTSET:
3731     default:
3732         frame = 0;
3733         break;
3734     }
3735 
3736     return frame;
3737 }
3738 
3739 CharPtr
BlastConstructFilterString(Int4 filter_value)3740 BlastConstructFilterString(Int4 filter_value)
3741 
3742 {
3743 	Char buffer[32];
3744 	CharPtr ptr;
3745 
3746 	ptr = buffer;
3747 
3748 	if (filter_value == FILTER_NONE)
3749 		return NULL;
3750 
3751 	if (filter_value & FILTER_DUST)
3752 	{
3753 		*ptr = 'D'; ptr++;
3754 		*ptr = ';'; ptr++;
3755 	}
3756 
3757 	if (filter_value & FILTER_SEG)
3758 	{
3759 		*ptr = 'S'; ptr++;
3760 		*ptr = ';'; ptr++;
3761 	}
3762 
3763 	*ptr = NULLB;
3764 
3765 	return StringSave(buffer);
3766 }
3767 
3768 void
HackSeqLocId(SeqLocPtr slp,SeqIdPtr id)3769 HackSeqLocId(SeqLocPtr slp, SeqIdPtr id)
3770 {
3771 	if (slp == NULL) {
3772 		return;
3773 	}
3774 	switch (slp->choice) {
3775 	case SEQLOC_BOND:
3776 	case SEQLOC_FEAT:
3777 		/* unsupported */
3778 		/* assert(0); */
3779 		break;
3780 	case SEQLOC_NULL:
3781 	case SEQLOC_EMPTY:
3782 		break;
3783 	case SEQLOC_WHOLE:
3784 		SeqIdSetFree((SeqIdPtr)slp->data.ptrvalue);
3785 		slp->data.ptrvalue = SeqIdDup(id);
3786 		break;
3787 	case SEQLOC_EQUIV:
3788 	case SEQLOC_MIX:
3789 	case SEQLOC_PACKED_INT:
3790 		slp = (SeqLocPtr)slp->data.ptrvalue;
3791 		for (; slp != NULL; slp = slp->next) {
3792 			HackSeqLocId(slp, id);
3793 		}
3794 		break;
3795 	case SEQLOC_INT:
3796 		SeqIdSetFree(((SeqIntPtr)slp->data.ptrvalue)->id);
3797 		((SeqIntPtr)slp->data.ptrvalue)->id = SeqIdDup(id);
3798 		break;
3799 	case SEQLOC_PNT:
3800 		SeqIdSetFree(((SeqPntPtr)slp->data.ptrvalue)->id);
3801 		((SeqPntPtr)slp->data.ptrvalue)->id = SeqIdDup(id);
3802 		break;
3803 	case SEQLOC_PACKED_PNT:
3804 		SeqIdSetFree(((PackSeqPntPtr)slp->data.ptrvalue)->id);
3805 		((PackSeqPntPtr)slp->data.ptrvalue)->id = SeqIdDup(id);
3806 		break;
3807 	/* default:
3808 		assert(0); */
3809 	}
3810 }
3811 /* This function duplicates a SEQLOC_PACKED_INT or a SEQLOC_INT type of SeqLoc */
blastDuplicateSeqLocInt(SeqLocPtr slp_head)3812 static SeqLocPtr blastDuplicateSeqLocInt(SeqLocPtr slp_head)
3813 {
3814     SeqLocPtr dup_slp, slp, dup_head = NULL;
3815     SeqIntPtr sqip;
3816 
3817     if(slp_head == NULL)
3818         return NULL;
3819 
3820     /* First seqLoc in lower level */
3821 
3822     if (slp_head->choice == SEQLOC_PACKED_INT) {
3823        slp = slp_head->data.ptrvalue;
3824        dup_head = ValNodeNew(NULL);
3825        dup_head->choice = slp_head->choice;
3826     } else if (slp_head->choice == SEQLOC_INT) {
3827        slp = slp_head;
3828     } else {
3829        return NULL;
3830     }
3831     sqip = slp->data.ptrvalue;
3832 
3833     /* Top level SeqLoc */
3834 
3835     dup_slp = (VoidPtr) SeqLocIntNew(sqip->from, sqip->to, sqip->strand, sqip->id);
3836     if (dup_head)
3837        dup_head->data.ptrvalue = dup_slp;
3838     else
3839        dup_head = dup_slp;
3840 
3841     /* Loop over all SeqIntPtr s in this SeqLoc */
3842     for(slp = slp->next; slp != NULL; slp = slp->next) {
3843         sqip = slp->data.ptrvalue;
3844         dup_slp->next = (VoidPtr) SeqLocIntNew(sqip->from, sqip->to, sqip->strand, sqip->id);
3845         dup_slp = dup_slp->next;
3846     }
3847 
3848     return dup_head;
3849 }
3850 /* This function use PACKED INT as mask */
BLASTUpdateSeqIdInSeqInt(SeqLocPtr mask,SeqIdPtr sip)3851 void BLASTUpdateSeqIdInSeqInt(SeqLocPtr mask, SeqIdPtr sip)
3852 {
3853     SeqLocPtr slp;
3854     SeqIntPtr sintp;
3855 
3856     if(mask == NULL)
3857         return;
3858 
3859     for(slp = mask->data.ptrvalue; slp != NULL; slp = slp->next) {
3860         if(slp->choice != SEQLOC_INT)
3861             continue;
3862         sintp = (SeqIntPtr)slp->data.ptrvalue;
3863         SeqIdSetFree(sintp->id);
3864         sintp->id = SeqIdDup(sip);
3865     }
3866     return;
3867 }
3868 
3869 /* Adjust offsets in the mask locations list; discard locations outside of
3870    the range */
3871 static SeqLocPtr
AdjustOffsetsInMaskLoc(SeqLocPtr mask_loc,Int4 start,Int4 end)3872 AdjustOffsetsInMaskLoc(SeqLocPtr mask_loc, Int4 start, Int4 end)
3873 {
3874    SeqLocPtr slp, last_slp = NULL, next_slp, head = NULL;
3875    SeqIntPtr loc;
3876 
3877    if (!mask_loc)
3878       return NULL;
3879 
3880    if (mask_loc->choice == SEQLOC_PACKED_INT)
3881       slp = (SeqLocPtr) mask_loc->data.ptrvalue;
3882    else if (mask_loc->choice == SEQLOC_INT)
3883       slp = mask_loc;
3884    else /* Should be impossible */
3885       return NULL;
3886 
3887    while (slp) {
3888       if (slp->choice == SEQLOC_INT) {
3889          loc = (SeqIntPtr) slp->data.ptrvalue;
3890          loc->from = MAX(loc->from, start);
3891          loc->to = MIN(loc->to, end);
3892          if (loc->from >= loc->to) {
3893             /* This mask location does not intersect the interval.
3894                Remove it. */
3895             next_slp = slp->next;
3896             SeqLocFree(slp);
3897             slp = next_slp;
3898          } else {
3899             if (last_slp) {
3900                last_slp->next = slp;
3901             } else {
3902                head = slp;
3903             }
3904             last_slp = slp;
3905             slp = slp->next;
3906          }
3907       } else {
3908          next_slp = slp->next;
3909          SeqLocFree(slp);
3910          slp = next_slp;
3911       }
3912    }
3913    if (last_slp)
3914       last_slp->next = NULL;
3915 
3916    if (mask_loc->choice == SEQLOC_PACKED_INT) {
3917       mask_loc->data.ptrvalue = head;
3918       /* If there are no locations left, free the packed-int and
3919          return NULL. */
3920       if (!head)
3921          mask_loc = ValNodeFree(mask_loc);
3922       return mask_loc;
3923    } else {
3924       return head;
3925    }
3926 }
3927 
3928 /* This function use PACKED INT as slp2 */
blastMergeFilterLocs(SeqLocPtr slp1,SeqLocPtr slp2,Boolean translate,Int2 frame,Int4 length)3929 SeqLocPtr blastMergeFilterLocs(SeqLocPtr slp1, SeqLocPtr slp2, Boolean translate,
3930                                Int2 frame, Int4 length)
3931 {
3932 
3933     SeqLocPtr slp, dup_slp, dup_head;
3934 
3935     if(slp1 == NULL && slp2 == NULL)
3936         return NULL;
3937 
3938     if(slp2 == NULL)
3939         return slp1;
3940 
3941     if (slp2->choice == SEQLOC_PACKED_INT || slp2->choice == SEQLOC_INT) {
3942         dup_slp = blastDuplicateSeqLocInt(slp2);
3943     }
3944     else if (slp2->choice == SEQLOC_MIX) {
3945         /* for mixed seqlocs, recursively flatten all the internal
3946            seqloc components into a single seqloc_int */
3947         SeqLocPtr list_slp = slp2;
3948         dup_slp = NULL;
3949         while (list_slp != NULL) {
3950             if (list_slp->choice == SEQLOC_MIX) {
3951                 dup_slp = blastMergeFilterLocs(dup_slp, list_slp->data.ptrvalue,
3952                                            FALSE, frame, length);
3953             }
3954             else {
3955                 dup_slp = blastMergeFilterLocs(dup_slp, list_slp,
3956                                            FALSE, frame, length);
3957             }
3958             list_slp = list_slp->next;
3959         }
3960     }
3961     else {
3962         ErrPostEx(SEV_FATAL, 1, 0, "Duplication of SeqLoc failed\n");
3963     }
3964 
3965     /* Request to translate means, that slp2 is DNA SeqLoc, that should be
3966        translated into protein SeqLoc corresponding to the specific frame */
3967 
3968     if(translate) {
3969         BlastConvertDNASeqLoc(dup_slp, frame, length);
3970     }
3971 
3972     if(slp1 == NULL) {
3973         return dup_slp;
3974     }
3975 
3976     /* OK We have 2 not NULL filters - merging... */
3977 
3978     if(slp1->choice == SEQLOC_PACKED_INT)
3979         slp = (SeqLocPtr) slp1->data.ptrvalue;
3980     else
3981         slp = slp1;
3982 
3983     if (dup_slp->choice == SEQLOC_PACKED_INT) {
3984        dup_head = dup_slp;
3985        dup_slp = (SeqLocPtr) dup_slp->data.ptrvalue;
3986        MemFree(dup_head);
3987     }
3988 
3989     if(slp == NULL) {
3990         ErrPostEx(SEV_WARNING, 0, 0, "Invalid filter detected");
3991         slp1->data.ptrvalue = dup_slp;
3992     }
3993     else
3994     {
3995     	while(slp->next != NULL)
3996            slp = slp->next;
3997 
3998     	slp->next = dup_slp;
3999      }
4000 
4001     return slp1;
4002 }
4003 
4004 /* This function is used to filter one frame of the translated DNA
4005    sequence */
rpsBlastFilterSequence(BlastSearchBlkPtr search,Int4 frame,Uint1Ptr sequence,Int4 prot_length,Int4 dna_length)4006 static void rpsBlastFilterSequence(BlastSearchBlkPtr search, Int4 frame,
4007                                    Uint1Ptr sequence, Int4 prot_length,
4008                                    Int4 dna_length)
4009 {
4010     BioseqPtr bsp_temp;
4011     Boolean mask_at_hash = FALSE;
4012     SeqLocPtr filter_slp = NULL;
4013 
4014     if(search->pbp->query_lcase_mask == NULL) {
4015         if(search->pbp->filter_string == NULL || !StringICmp(search->pbp->filter_string, "F"))
4016             return;                 /* No filtering */
4017     }
4018 
4019     bsp_temp = BlastMakeTempProteinBioseq(sequence+1, prot_length,
4020                                           Seq_code_ncbistdaa);
4021 
4022     filter_slp = BlastBioseqFilterEx(bsp_temp, search->pbp->filter_string,
4023                                      &mask_at_hash);
4024     HackSeqLocId(filter_slp, search->subject_info->sip);
4025 
4026     if(search->pbp->query_lcase_mask != NULL) {
4027         filter_slp = blastMergeFilterLocs(filter_slp, search->pbp->query_lcase_mask, TRUE, frame, dna_length);
4028     }
4029 
4030     /* SeqMgrDeleteFromBioseqIndex(bsp_temp); */
4031 
4032     /* bsp_temp->id = SeqIdSetFree(bsp_temp->id); */
4033     bsp_temp = BioseqFree(bsp_temp);
4034 
4035     BlastMaskTheResidues(sequence+1, prot_length, 21, filter_slp, FALSE, 0);
4036 
4037     /* Conversion to ProteinSeqLoc will be done after original SeqLoc will
4038        be used once again on the Gapped extention stage */
4039 
4040     /*    BlastConvertProteinSeqLoc(filter_slp, frame, dna_length); */
4041 
4042     if (filter_slp)
4043         ValNodeAddPointer(&(search->mask), FrameToDefine(frame), filter_slp);
4044 
4045     return;
4046 }
4047 BlastSequenceBlkPtr PNTR LIBCALL
BlastMakeCopyQueryDNAP(BlastSequenceBlkPtr PNTR bsbpp_in)4048 BlastMakeCopyQueryDNAP(BlastSequenceBlkPtr PNTR bsbpp_in)
4049 {
4050     BlastSequenceBlkPtr PNTR bsbpp;
4051     Int4 buff_size, m;
4052 
4053     if(bsbpp_in == NULL)
4054         return NULL;
4055 
4056     bsbpp = MemNew(sizeof(BlastSequenceBlkPtr)*2);
4057     for(m = 0; m < 2; m++) {
4058        if (bsbpp_in[m]) {
4059           bsbpp[m] = (BlastSequenceBlkPtr) MemNew(sizeof(BlastSequenceBlk));
4060 
4061           buff_size = bsbpp_in[m]->length+3*CODON_LENGTH;
4062           bsbpp[m]->sequence_start = MemNew(buff_size);
4063 
4064           MemCpy(bsbpp[m]->sequence_start,
4065                  bsbpp_in[m]->sequence_start, buff_size);
4066 
4067           bsbpp[m]->sequence = bsbpp_in[m]->sequence;
4068 
4069           bsbpp[m]->length = bsbpp_in[m]->length;
4070           bsbpp[m]->original_length = bsbpp_in[m]->original_length;
4071           bsbpp[m]->effective_length = bsbpp_in[m]->effective_length;
4072        }
4073     }
4074 
4075     return bsbpp;
4076 }
4077 
BlastFreeQueryDNAP(BlastSequenceBlkPtr PNTR bsbpp)4078 void LIBCALL BlastFreeQueryDNAP(BlastSequenceBlkPtr PNTR bsbpp)
4079 {
4080     Int4 m;
4081 
4082     for(m = 0; m < 2; m++) {
4083         BlastSequenceBlkDestruct(bsbpp[m]);
4084     }
4085 
4086     MemFree(bsbpp);
4087 
4088     return;
4089 }
4090 
4091 BlastSequenceBlkPtr PNTR LIBCALL
BlastCreateQueryDNAP(BlastSearchBlkPtr search,Int4 length)4092 BlastCreateQueryDNAP(BlastSearchBlkPtr search, Int4 length)
4093 {
4094 
4095     BlastSequenceBlkPtr PNTR bsbpp;
4096     Uint1Ptr dnap;
4097     Int4 i, j, k, m;
4098     Int4 shift;
4099     BLASTContextStructPtr context = search->context;
4100     Uint1 strand_option;
4101 
4102     if(context == NULL)
4103         return NULL;
4104 
4105     strand_option = search->last_context / CODON_LENGTH;
4106 
4107     bsbpp = MemNew(sizeof(BlastSequenceBlkPtr)*2);
4108 
4109     for(m = search->first_context/CODON_LENGTH;
4110         m <= search->last_context/CODON_LENGTH; m++) {
4111 
4112         bsbpp[m] = (BlastSequenceBlkPtr) MemNew(sizeof(BlastSequenceBlk));
4113 
4114         dnap = MemNew(length+3*CODON_LENGTH);
4115         /* dnap = MemNew(length + 1); */
4116 
4117         dnap[0]=dnap[1]=dnap[2] = NULLB;
4118 
4119         shift = m*CODON_LENGTH;
4120         for (i = 0, j = 0; i < length+1;) {
4121             for(k = shift; k < shift + CODON_LENGTH; k++) {
4122                 dnap[i] = context[k].query->sequence_start[j];
4123                 i++;
4124             }
4125             j++;
4126         }
4127         BlastSequenceAddSequence(bsbpp[m], dnap+3, dnap, length, length, 0);
4128     }
4129 
4130     return bsbpp;
4131 }
4132 
4133 
BLASTCalculateSearchSpace(BLAST_OptionsBlkPtr options,Int4 nseq,Int8 dblen,Int4 qlen)4134 FloatHi LIBCALL BLASTCalculateSearchSpace(BLAST_OptionsBlkPtr options,
4135         Int4 nseq, Int8 dblen, Int4 qlen)
4136 {
4137     Int4 length_adjustment, qlen_eff;
4138     Int8 dblen_eff;
4139     BLAST_KarlinBlkPtr kbp;
4140     FloatHi searchsp;
4141 
4142     if (options == NULL)
4143         return 0;
4144 
4145     kbp = BlastKarlinBlkCreate();
4146     BlastKarlinBlkGappedCalcEx(kbp, options->gap_open, options->gap_extend,
4147             options->decline_align, options->matrix, NULL);
4148 
4149     if (options->gapped_calculation ) {
4150         Nlm_FloatHi alpha, beta; /*alpha and beta for new scoring system */
4151         if (StringCmp(options->program_name, "blastn") != 0)
4152             getAlphaBeta(options->matrix,&alpha,&beta,options->gapped_calculation,
4153                      options->gap_open, options->gap_extend);
4154         else
4155             BlastKarlinGetNuclAlphaBeta(options->reward, options->penalty, options->gap_open,
4156                      options->gap_extend, kbp, options->gapped_calculation, &alpha, &beta);
4157 
4158         BlastComputeLengthAdjustment(kbp->K,
4159                                      kbp->logK, alpha/kbp->Lambda, beta,
4160                                      qlen,
4161                                      dblen, nseq,
4162                                      &length_adjustment );
4163     } else {
4164         BlastComputeLengthAdjustment(kbp->K, kbp->logK, 1/kbp->H, 0.0,
4165                                      qlen,
4166                                      dblen, nseq,
4167                                      &length_adjustment );
4168     }
4169 
4170     kbp = BlastKarlinBlkDestruct(kbp);
4171 
4172     qlen_eff   = qlen - length_adjustment;
4173     dblen_eff  = dblen - nseq*length_adjustment;
4174     searchsp   = ((Nlm_FloatHi) qlen_eff) * ((Nlm_FloatHi) dblen_eff);
4175 
4176     return searchsp;
4177 }
4178 
4179 #define DROPOFF_NUMBER_OF_BITS 10.0
4180 #define INDEX_THR_MIN_SIZE 20000
4181 #define DEFAULT_LONGEST_INTRON 122
4182 
BLASTSetUpSearchInternalByLoc(BlastSearchBlkPtr search,SeqLocPtr query_slp,BioseqPtr query_bsp,CharPtr prog_name,Int4 qlen,BLAST_OptionsBlkPtr options,int (LIBCALLBACK * callback)(Int4 done,Int4 positives))4183 Int2 LIBCALL BLASTSetUpSearchInternalByLoc (BlastSearchBlkPtr search, SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)(Int4 done, Int4 positives))
4184 
4185 {
4186 	BioseqPtr bsp_temp, bsp;
4187 	Boolean mask_at_hash=FALSE, private_slp_delete;
4188 	Boolean query_is_na, db_is_na;
4189 	Char buffer[128];
4190 	Int2 retval = 0, status, last_index;
4191 	Int4 effective_query_length, query_length, full_query_length,
4192 		index, length, length_adjustment=0;
4193 	Int4 max_length, block_width;
4194 	Nlm_FloatHi avglen;
4195 	ReadDBFILEPtr rdfp;
4196 	SeqIdPtr query_id;
4197 	SeqPortPtr spp=NULL, spp_reverse=NULL;
4198 	SeqLocPtr filter_slp=NULL, private_slp=NULL, private_slp_rev=NULL, private_slp_double=NULL;
4199 	GeneticCodePtr gcp;
4200 	Uint1 residue, strand;
4201 	Uint1Ptr sequence;
4202 	Uint1Ptr query_seq, query_seq_start, query_seq_rev, query_seq_start_rev;
4203 	ValNodePtr vnp;
4204         Int4 query_loc_start;
4205 
4206 	/* AM: Temporaries to compute effective lengths of individual queries. */
4207 	IntArray lengths_eff=NULL;
4208 	IntArray length_adj_tmp=NULL;
4209 	Int4 le_iter, length_tmp;
4210 	Int4 i;
4211         BLAST_ScoreBlkPtr sbptmp = NULL; /* AM: query concatenation */
4212 
4213 	/* AM: To support individual masking in the case of query multiplexing. */
4214 	SeqLocPtr *concat_filter_slp=NULL, *concat_private_slp=NULL, *concat_private_slp_rev=NULL,
4215 	          * indiv_filter_slp=NULL, *indiv_private_slp=NULL, *indiv_private_slp_rev=NULL;
4216         SeqLocPtr ConcatLCaseMask;
4217 	Boolean * indiv_mask_at_hash=NULL;
4218 	QueriesPtr mult_queries = NULL;
4219 
4220 	if (options == NULL)
4221 	{
4222 	  	ErrPostEx(SEV_FATAL, 1, 0, "BLAST_OptionsBlkPtr is NULL\n");
4223 		return 1;
4224 	}
4225 
4226 	if (query_slp == NULL && query_bsp == NULL)
4227 	{
4228 	  	ErrPostEx(SEV_FATAL, 1, 0, "Query is NULL\n");
4229 		return 1;
4230 	}
4231 
4232         /* AM: Support for query multiplexing. */
4233 	mult_queries = search->mult_queries;
4234 
4235         if( mult_queries )
4236 	{
4237 	  concat_filter_slp
4238 	    = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4239 	  indiv_filter_slp
4240 	    = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4241 	  concat_private_slp
4242 	    = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4243 	  concat_private_slp_rev
4244 	    = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4245 	  indiv_private_slp
4246 	    = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4247 	  indiv_private_slp_rev
4248 	    = (SeqLocPtr *)MemNew( mult_queries->NumQueries*sizeof( SeqLocPtr ) );
4249           indiv_mask_at_hash
4250 	    = (Boolean *)MemNew( mult_queries->NumQueries*sizeof( Boolean ) );
4251 	}
4252 
4253 	query_seq = NULL;	/* Gets rid of warning. */
4254 	query_seq_rev = NULL;	/* Gets rid of warning. */
4255 	query_seq_start = NULL;	/* Gets rid of warning. */
4256 	query_seq_start_rev = NULL;	/* Gets rid of warning. */
4257 
4258         /* These parameters are used by translated RPS Blast */
4259         search->pbp->filter_string = StringSave(options->filter_string);
4260         search->pbp->is_rps_blast = options->is_rps_blast;
4261 
4262         /* Restrict lower case mask to the query interval, if it is
4263            not a whole Bioseq. */
4264         if (query_slp) {
4265            options->query_lcase_mask =
4266               AdjustOffsetsInMaskLoc(options->query_lcase_mask,
4267                                      SeqLocStart(query_slp),
4268                                      SeqLocStop(query_slp));
4269         }
4270         search->pbp->query_lcase_mask = options->query_lcase_mask;
4271         search->pbp->is_ooframe = options->is_ooframe;
4272         search->pbp->shift_pen = options->shift_pen;
4273 
4274 	if (query_slp)
4275 	{
4276                 query_loc_start = SeqLocStart(query_slp);
4277 		strand = SeqLocStrand(query_slp);
4278 		if (strand == Seq_strand_unknown || strand == Seq_strand_plus || strand == Seq_strand_both)
4279 		{
4280 			private_slp = SeqLocIntNew(query_loc_start, SeqLocStop(query_slp), Seq_strand_plus, SeqLocId(query_slp));
4281 
4282                   /* AM: Support for query multiplexing. */
4283 		  if( mult_queries )
4284 		    for( i = 0; i < mult_queries->NumQueries; ++i )
4285 		    {
4286 		      indiv_private_slp[i]
4287 		        = SeqLocIntNew( 0,
4288 			                mult_queries->QueryEnds[i] - mult_queries->QueryStarts[i],
4289 					Seq_strand_plus,
4290 					mult_queries->FakeBsps[i]->id );
4291                       concat_private_slp[i]
4292 		        = SeqLocIntNew( mult_queries->QueryStarts[i],
4293 			                mult_queries->QueryEnds[i],
4294 					Seq_strand_plus,
4295 					SeqLocId( query_slp ) );
4296                     }
4297 		}
4298 		if (strand == Seq_strand_minus || strand == Seq_strand_both)
4299 		{
4300 			private_slp_rev = SeqLocIntNew(query_loc_start, SeqLocStop(query_slp), Seq_strand_minus, SeqLocId(query_slp));
4301 
4302                   /* AM: Support for query multiplexing. */
4303 		  if( mult_queries )
4304 		    for( i = 0; i < mult_queries->NumQueries; ++i )
4305 		    {
4306 		      indiv_private_slp_rev[i]
4307 		        = SeqLocIntNew( 0,
4308 			                mult_queries->QueryEnds[i] - mult_queries->QueryStarts[i],
4309 					Seq_strand_minus,
4310 					mult_queries->FakeBsps[i]->id );
4311                       concat_private_slp_rev[i]
4312 		        = SeqLocIntNew( mult_queries->QueryStarts[i],
4313 			                mult_queries->QueryEnds[i],
4314 					Seq_strand_minus,
4315 					SeqLocId( query_slp ) );
4316                     }
4317 		}
4318 		private_slp_delete = TRUE;
4319 	   	if (search->prog_number==blast_type_blastn)
4320 	      		search = BlastFillQueryOffsets(search, query_slp, 1);
4321 
4322 	}
4323 	else
4324 	{
4325 		private_slp = SeqLocIntNew(0, query_bsp->length-1 , Seq_strand_plus, SeqIdFindBest(query_bsp->id, SEQID_GI));
4326 		private_slp_rev = SeqLocIntNew(0, query_bsp->length-1 , Seq_strand_minus, SeqIdFindBest(query_bsp->id, SEQID_GI));
4327 		private_slp_delete = FALSE;
4328 
4329 		private_slp_double = SeqLocIntNew(0, query_bsp->length-1 , Seq_strand_both, SeqIdFindBest(query_bsp->id, SEQID_GI));
4330 	   	if (search->prog_number==blast_type_blastn)
4331 	      		search = BlastFillQueryOffsets(search,
4332                                                        private_slp_double, 1);
4333 		SeqLocFree(private_slp_double);
4334 	}
4335 
4336 	query_length = 0;
4337 	if (private_slp)
4338 		query_length = SeqLocLen(private_slp);
4339 	else if (private_slp_rev)
4340 		query_length = SeqLocLen(private_slp_rev);
4341 	if (query_length == 0)
4342 	{
4343 		sprintf(buffer, "No valid query sequence");
4344 		BlastConstructErrorMessage("Blast", buffer, 2,
4345                                            &(search->error_return));
4346                 retval = 1;
4347 		goto BlastSetUpReturn;
4348 	}
4349 
4350 	bsp = NULL;
4351         if (private_slp)
4352            bsp = BioseqLockById(SeqLocId(private_slp));
4353         else if (private_slp_rev)
4354            bsp = BioseqLockById(SeqLocId(private_slp_rev));
4355 
4356         if (bsp == NULL) {
4357            ErrPostEx(SEV_WARNING, 0, 0, "No valid query sequence, BioseqLockById returned NULL\n");
4358            retval = 1;
4359            goto BlastSetUpReturn;
4360         }
4361         full_query_length = bsp->length;
4362 
4363         BlastGetTypes(prog_name, &query_is_na, &db_is_na);
4364         if (query_is_na != ISA_na(bsp->mol))	{
4365            ErrPostEx(SEV_WARNING, 0, 0, "Query molecule is incompatible with %s program", prog_name);
4366            BioseqUnlock(bsp);
4367            retval = 1;
4368            goto BlastSetUpReturn;
4369         }
4370 	if (bsp && bsp->repr == Seq_repr_virtual) {
4371            BioseqUnlock(bsp);
4372            ErrPostEx(SEV_WARNING, 0, 0, "Virtual sequence detected\n");
4373            retval = 1;
4374            goto BlastSetUpReturn;
4375 	}
4376 	BioseqUnlock(bsp);
4377 
4378 	if (query_slp)
4379 	{
4380 		search->query_slp = query_slp;
4381 	}
4382 	else
4383 	{
4384 		search->query_slp = private_slp;
4385 		search->allocated += BLAST_SEARCH_ALLOC_QUERY_SLP;
4386 	}
4387 
4388 
4389 	search->translation_buffer = NULL;
4390 	search->translation_buffer_size = 0;
4391 
4392 	/*
4393 	Get genetic codes (should be determined from BLAST_OptionsBlkPtr.
4394 	Only needed for blastx, tblast[nx]
4395 	*/
4396 	if (StringCmp(prog_name, "blastp") != 0 && StringCmp(prog_name, "blastn") != 0)
4397 	{
4398 
4399                 if (StringCmp(prog_name, "tblastx") == 0
4400 		    || StringCmp(prog_name, "tblastn") == 0
4401 		    ||StringCmp(prog_name, "psitblastn") == 0)
4402 
4403 		{
4404 			gcp = GeneticCodeFind(options->db_genetic_code, NULL);
4405 			for (vnp = (ValNodePtr)gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next)
4406 			{
4407 				if (vnp->choice == 3)	/* ncbieaa */
4408 				{
4409 					search->db_genetic_code = (CharPtr)vnp->data.ptrvalue;
4410 					break;
4411 				}
4412 			}
4413 			search->translation_table = GetPrivatTranslationTable(search->db_genetic_code, FALSE);
4414 			search->translation_table_rc = GetPrivatTranslationTable(search->db_genetic_code, TRUE);
4415 			max_length = 0;
4416 			rdfp = search->rdfp;
4417 			while (rdfp)
4418 			{
4419 				max_length = MAX(max_length, readdb_get_maxlen(rdfp));
4420 				rdfp = rdfp->next;
4421 			}
4422 			search->translation_buffer = MemNew((3+(max_length/3))*sizeof(Uint1));
4423 			search->translation_buffer_size = 1+(max_length/3);
4424 			search->allocated += BLAST_SEARCH_ALLOC_TRANS_INFO;
4425 		}
4426 
4427 		if (StringCmp(prog_name, "blastx") == 0 || StringCmp(prog_name, "tblastx") == 0)
4428 		{
4429 			gcp = GeneticCodeFind(options->genetic_code, NULL);
4430 			for (vnp = (ValNodePtr)gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next)
4431 			{
4432 				if (vnp->choice == 3)	/* ncbieaa */
4433 				{
4434 					search->genetic_code = (CharPtr)vnp->data.ptrvalue;
4435 					break;
4436 				}
4437 			}
4438 		}
4439 	}
4440 
4441 	if (options->filter && !options->filter_string)
4442 		options->filter_string = BlastConstructFilterString(options->filter);
4443 
4444 	/* If the query is translated do this below. */
4445 	if (StringCmp(prog_name, "blastx") &&
4446             StringCmp(prog_name, "tblastx")) {
4447                                 /* Futamura */
4448             if(!(search->pbp->is_rps_blast &&
4449                                   (!StringCmp(prog_name, "tblastn")||
4450                                    !StringCmp(prog_name, "psitblastn")))) {
4451 		/* AM: Query multiplexing. */
4452                 if( !mult_queries )
4453 		{
4454                   if (private_slp)
4455                       filter_slp = BlastSeqLocFilterEx(private_slp, options->filter_string, &mask_at_hash);
4456                   else if (private_slp_rev)
4457                       filter_slp = BlastSeqLocFilterEx(private_slp_rev, options->filter_string, &mask_at_hash);
4458 
4459                   /* If lower case characters were detected in the input
4460                      their locations will be masked out */
4461 
4462                   if(search->pbp->query_lcase_mask != NULL) {
4463                       filter_slp = blastMergeFilterLocs(filter_slp, search->pbp->query_lcase_mask, FALSE, 0, 0);
4464                   }
4465                 }
4466 		else
4467 		  for( i = 0; i < mult_queries->NumQueries; ++i )
4468 		  {
4469 		    if( indiv_private_slp[i] )
4470 		    {
4471 		      indiv_filter_slp[i]
4472 		        = BlastSeqLocFilterEx( indiv_private_slp[i],
4473 			                       options->filter_string,
4474 					       indiv_mask_at_hash + i );
4475                       concat_filter_slp[i]
4476 		        = BlastSeqLocFilterEx( concat_private_slp[i],
4477 			                       options->filter_string,
4478 					       indiv_mask_at_hash + i );
4479                     }
4480 		    else if( indiv_private_slp_rev[i] )
4481 		    {
4482 		      indiv_filter_slp[i]
4483 		        = BlastSeqLocFilterEx( indiv_private_slp_rev[i],
4484 			                       options->filter_string,
4485 					       indiv_mask_at_hash + i );
4486                       concat_filter_slp[i]
4487 		        = BlastSeqLocFilterEx( concat_private_slp_rev[i],
4488 			                       options->filter_string,
4489 					       indiv_mask_at_hash + i );
4490                     }
4491 
4492                     if( mult_queries->LCaseMasks && mult_queries->LCaseMasks[i] )
4493 		    {
4494 		      indiv_filter_slp[i] = blastMergeFilterLocs( indiv_filter_slp[i],
4495 		                                                  (SeqLocPtr)mult_queries->LCaseMasks[i]->data.ptrvalue,
4496 								  FALSE, 0, 0 );
4497                       ConcatLCaseMask = ConcatSeqLoc( mult_queries, mult_queries->LCaseMasks[i],
4498 		                                      SeqLocId( query_slp ), i );
4499                       concat_filter_slp[i] = blastMergeFilterLocs( concat_filter_slp[i],
4500 		                                                   (SeqLocPtr)ConcatLCaseMask->data.ptrvalue,
4501 								   FALSE, 0, 0 );
4502                     }
4503                   }
4504             }
4505         }
4506 
4507         if( mult_queries ) { /* AM: query concatenation: free resources */
4508             for( i = 0; i < mult_queries->NumQueries; ++i ) {
4509                 SeqLocFree( indiv_private_slp_rev[i] );
4510                 SeqLocFree( indiv_private_slp[i] );
4511                 SeqLocFree( concat_private_slp_rev[i] );
4512                 SeqLocFree( concat_private_slp[i] );
4513             }
4514 
4515             MemFree( indiv_private_slp_rev );
4516             MemFree( indiv_private_slp );
4517             MemFree( concat_private_slp_rev );
4518             MemFree( concat_private_slp );
4519         }
4520 
4521 	/*
4522            Dusting of query sequence. Only needed for blastn, optional
4523         */
4524 
4525         if(StringCmp(prog_name, "blastn") == 0) {
4526 	  /* AM: Changed to support query multiplexing. */
4527 	  if( !mult_queries )
4528 		if (filter_slp && !mask_at_hash)
4529 			ValNodeAddPointer(&(search->mask), SEQLOC_MASKING_NOTSET, filter_slp);
4530 		else
4531 			ValNodeAddPointer(&(search->mask1), SEQLOC_MASKING_NOTSET, filter_slp);
4532           else
4533 	    for( i = 0; i < mult_queries->NumQueries; ++i )
4534 	      if( !indiv_mask_at_hash[i] )
4535 	        ValNodeAddPointer( &(search->mask), SEQLOC_MASKING_NOTSET, indiv_filter_slp[i] );
4536               else
4537 	        ValNodeAddPointer( &(search->mask1), SEQLOC_MASKING_NOTSET, indiv_filter_slp[i] );
4538         }
4539 
4540 
4541         if (StringCmp(prog_name, "blastp") == 0
4542             || StringCmp(prog_name, "tblastn") == 0
4543             || StringCmp(prog_name, "psitblastn") == 0)
4544 	{
4545 		spp = SeqPortNewByLoc(private_slp, Seq_code_ncbistdaa);
4546                 SeqPortSet_do_virtual(spp, TRUE);
4547 
4548 		/* AM: Changed to support query multiplexing. */
4549 		if( !mult_queries )
4550 		  if (filter_slp && !mask_at_hash)
4551 			ValNodeAddPointer(&(search->mask), SEQLOC_MASKING_NOTSET, filter_slp);
4552 		  else
4553 			ValNodeAddPointer(&(search->mask1), SEQLOC_MASKING_NOTSET, filter_slp);
4554                 else
4555 		  for( i = 0; i < mult_queries->NumQueries; ++i )
4556 		    if( !indiv_mask_at_hash[i] )
4557 		      ValNodeAddPointer( &(search->mask), SEQLOC_MASKING_NOTSET, indiv_filter_slp[i] );
4558                     else
4559 		      ValNodeAddPointer( &(search->mask1), SEQLOC_MASKING_NOTSET, indiv_filter_slp[i] );
4560 	}
4561 	else if (StringCmp(prog_name, "blastx") == 0 || StringCmp(prog_name, "tblastx") == 0 || StringCmp(prog_name, "blastn") == 0)
4562 	{
4563 		if (private_slp)
4564 		{
4565 			spp = SeqPortNewByLoc(private_slp, Seq_code_ncbi4na);
4566                 	SeqPortSet_do_virtual(spp, TRUE);
4567 		}
4568 		if (private_slp_rev)
4569 		{
4570 			spp_reverse = SeqPortNewByLoc(private_slp_rev, Seq_code_ncbi4na);
4571                 	SeqPortSet_do_virtual(spp_reverse, TRUE);
4572 		}
4573 	}
4574 	else
4575 	{
4576 	  	ErrPostEx(SEV_FATAL, 1, 0, "Only blastn, blastp, blastx, tblastn tblastx is allowed\n");
4577                 retval = 1;
4578 		goto BlastSetUpReturn;
4579 	}
4580 
4581         /* AM: query concatenation: free resources */
4582         MemFree( indiv_mask_at_hash );
4583         MemFree( indiv_filter_slp );
4584 
4585 	if (spp)
4586 	{
4587 		query_seq_start = (Uint1Ptr) MemNew(2*((query_length)+2)*sizeof(Char));
4588 		query_seq_start[0] = NULLB;
4589 		query_seq = query_seq_start+1;
4590 		index=0;
4591 		while ((residue=SeqPortGetResidue(spp)) != SEQPORT_EOF)
4592 		{
4593 
4594 			if (IS_residue(residue))
4595 			{
4596 				if (residue == 24) /* 24 is Selenocysteine. */
4597 				{
4598 					residue = 21; /* change Selenocysteine to X. */
4599 					sprintf(buffer, "Selenocysteine (U) at position %ld replaced by X",
4600 						(long) index+1);
4601 					BlastConstructErrorMessage("Blast", buffer, 1, &(search->error_return));
4602 				}
4603 				query_seq[index] = residue;
4604 				index++;
4605 			}
4606 		}
4607 		query_seq[index] = NULLB;
4608 		spp = SeqPortFree(spp);
4609 		if (StringCmp(prog_name, "blastn") == 0)
4610 		{
4611 			if (filter_slp)
4612 			{
4613 				if (mask_at_hash)
4614                 			search->context[0].location =
4615                         			BlastSeqLocFillDoubleIntEx(filter_slp, full_query_length, query_length, FALSE, SeqLocStart(private_slp));
4616 				else
4617 					BlastMaskTheResidues(query_seq, full_query_length, 15, filter_slp, FALSE, SeqLocStart(private_slp));
4618 			}
4619 
4620 			/* AM: query multiplexing. */
4621 			if( mult_queries )
4622                           for( i = 0; i < mult_queries->NumQueries; ++i )
4623 			    if( concat_filter_slp[i] )
4624 			      BlastMaskTheResidues( query_seq,
4625 					            full_query_length,
4626 					            15, concat_filter_slp[i], FALSE,
4627 					            SeqLocStart( private_slp ) );
4628 
4629 			for (index=0; index<=query_length+1; index++)
4630 				query_seq_start[index] = ncbi4na_to_blastna[query_seq_start[index]];
4631 		}
4632 	}
4633 
4634 	if (spp_reverse)
4635 	{
4636 		query_seq_start_rev = (Uint1Ptr) MemNew(((query_length)+2)*sizeof(Char));
4637 		query_seq_start_rev[0] = NULLB;
4638 		query_seq_rev = query_seq_start_rev+1;
4639 		index=0;
4640 		while ((residue=SeqPortGetResidue(spp_reverse)) != SEQPORT_EOF)
4641 		{
4642 			if (IS_residue(residue))
4643 			{
4644 				if (residue == 24) /* 24 is Selenocysteine. */
4645 				{
4646 					residue = 21; /* change Selenocysteine to X. */
4647 					sprintf(buffer, "Selenocysteine (U) at position %ld replaced by X",
4648 						(long) index+1);
4649 					BlastConstructErrorMessage("Blast", buffer, 1, &(search->error_return));
4650 				}
4651 				query_seq_rev[index] = residue;
4652 				index++;
4653 			}
4654 		}
4655 		query_seq_rev[index] = NULLB;
4656 		spp_reverse = SeqPortFree(spp_reverse);
4657 		if (StringCmp(prog_name, "blastn") == 0)
4658 		{
4659 			if (filter_slp)
4660 			{
4661 			   if (mask_at_hash)
4662 			     search->context[0].location =
4663 				BlastSeqLocFillDoubleIntRev(search->context[0].location, filter_slp, query_length, full_query_length, full_query_length - SeqLocStop(private_slp_rev) - 1);
4664 			   else
4665 			      BlastMaskTheResidues(query_seq_rev, full_query_length, 15, filter_slp, TRUE, full_query_length - SeqLocStop(private_slp_rev) - 1);
4666 			}
4667 
4668 			/* AM: query multiplexing. */
4669 			if( mult_queries )
4670 			  for( i = 0; i < mult_queries->NumQueries; ++i )
4671 			    if( concat_filter_slp[i] )
4672 			      BlastMaskTheResidues( query_seq_rev,
4673 				                    full_query_length,
4674                                                     15, concat_filter_slp[i], TRUE,
4675 				                    full_query_length
4676 				                      - SeqLocStop( private_slp_rev ) - 1 );
4677 
4678 			for (index=0; index<=query_length+1; index++)
4679 				query_seq_start_rev[index] =
4680 				   ncbi4na_to_blastna[query_seq_start_rev[index]];
4681 			if (query_seq_start)
4682 			   MemCpy(query_seq_start+query_length+1,
4683 				  query_seq_start_rev,query_length+2);
4684 		}
4685 	}
4686 
4687 /*
4688 	Set the context_factor, which specifies how many different
4689 	ways the query or db is examined (e.g., blastn looks at both
4690 	stands of query, context_factor is 2).
4691 */
4692 	if (StringCmp(prog_name, "blastp") == 0)
4693 	{
4694 		search->context_factor = 1;
4695 		length = query_length;
4696 	}
4697 	else if (StringCmp(prog_name, "blastn") == 0)
4698 	{	/* two strands concatenated in one sequence */
4699 		search->context_factor = 1;
4700 		length = query_length;
4701 	}
4702 	else if (StringCmp(prog_name, "blastx") == 0)
4703 	{	/* query translated in six frames. */
4704 		search->context_factor = search->last_context-search->first_context+1;
4705 		length = query_length/3;
4706 	}
4707         else if ( (StringCmp(prog_name, "tblastn") == 0)
4708                   || (StringCmp(prog_name, "psitblastn") == 0))
4709 	{	/* db translated in six frames. */
4710 		search->context_factor = 6;
4711 		length = query_length;
4712 	}
4713 	else if (StringCmp(prog_name, "tblastx") == 0)
4714 	{	/* db and query each translated in six frames. */
4715 		search->context_factor = 6*CODON_LENGTH*(search->last_context-search->first_context+1);
4716 		length = query_length/3;
4717 	}
4718 	else
4719 	{
4720 		sprintf(buffer, "%s is not a valid program name", prog_name);
4721 		BlastConstructErrorMessage("BLASTSetUpSearch", buffer, 2, &(search->error_return));
4722                 retval = 1;
4723 		goto BlastSetUpReturn;
4724 	}
4725 
4726 	if (private_slp)
4727 		query_id = SeqIdFindBest(SeqLocId(private_slp), SEQID_GI);
4728 	else
4729 		query_id = SeqIdFindBest(SeqLocId(private_slp_rev), SEQID_GI);
4730 
4731 	search->query_id = SeqIdDup(query_id);
4732 
4733 /* Store the query sequence, or the translation thereof. */
4734         if (StringCmp(prog_name, "blastp") == 0
4735             || StringCmp(prog_name, "tblastn") == 0
4736             || StringCmp(prog_name, "psitblastn") == 0)
4737 	{	/* One blastp context for now. */
4738 		if (filter_slp)
4739 		{
4740 			if (mask_at_hash)
4741                 		search->context[0].location =
4742                         		BlastSeqLocFillDoubleInt(filter_slp, query_length, FALSE);
4743 			else
4744 				BlastMaskTheResidues(query_seq, full_query_length, 21, filter_slp, FALSE, SeqLocStart(private_slp));
4745 		}
4746 
4747 	        /* AM: query multiplexing. */
4748 		if( mult_queries )
4749 		  for( i = 0; i < mult_queries->NumQueries; ++i )
4750 		    if( concat_filter_slp[i] )
4751 		      BlastMaskTheResidues( query_seq, full_query_length,
4752 		                            21, concat_filter_slp[i], FALSE,
4753 					    SeqLocStart( private_slp ) );
4754 
4755 		BlastSequenceAddSequence(search->context[0].query, NULL, query_seq_start, query_length, query_length, 0);
4756 	}
4757 	else if (StringCmp(prog_name, "blastx") == 0  || StringCmp(prog_name, "tblastx") == 0)
4758 	{
4759 
4760 		for (index=search->first_context; index<=search->last_context; index++)
4761 		{
4762 		   if (search->context[index].query->frame > 0)
4763 		   {
4764 			sequence = GetTranslation(query_seq, query_length, search->context[index].query->frame, &length, search->genetic_code);
4765 		   }
4766 		   else
4767 		   {
4768 			sequence = GetTranslation(query_seq_rev, query_length, search->context[index].query->frame, &length, search->genetic_code);
4769 		   }
4770 		   if (options->filter_string && length > 0)
4771 		   {
4772 		  	bsp_temp = BlastMakeTempProteinBioseq(sequence+1, length, Seq_code_ncbistdaa);
4773 
4774 			filter_slp = BlastBioseqFilterEx(bsp_temp, options->filter_string, &mask_at_hash);
4775 			HackSeqLocId(filter_slp, search->query_id);
4776 
4777                         /* If FASTA filtering is set - updating this SeqLoc */
4778                         if(search->pbp->query_lcase_mask != NULL) {
4779                             filter_slp = blastMergeFilterLocs(filter_slp, search->pbp->query_lcase_mask, TRUE, search->context[index].query->frame, query_length);
4780                         }
4781 
4782 			/* SeqMgrDeleteFromBioseqIndex(bsp_temp); */
4783 
4784 			/* bsp_temp->id = SeqIdSetFree(bsp_temp->id); */
4785 
4786 			bsp_temp = BioseqFree(bsp_temp);
4787 			if (mask_at_hash)
4788 			{
4789                 		search->context[index].location =
4790 					BlastSeqLocFillDoubleInt(filter_slp, query_length, FALSE);
4791 			}
4792 			else
4793 			{
4794 				BlastMaskTheResidues(sequence+1, length, 21, filter_slp, FALSE, 0);
4795 				BlastConvertProteinSeqLoc(filter_slp, search->context[index].query->frame, query_length);
4796 			}
4797 			if (filter_slp && !mask_at_hash)
4798 				ValNodeAddPointer(&(search->mask), FrameToDefine(search->context[index].query->frame), filter_slp);
4799 			else
4800 				ValNodeAddPointer(&(search->mask1), FrameToDefine(search->context[index].query->frame), filter_slp);
4801 		   }
4802 		   BlastSequenceAddSequence(search->context[index].query, NULL, sequence, length, query_length, 0);
4803 		}
4804 		query_seq_start = MemFree(query_seq_start);
4805 		query_seq_start_rev = MemFree(query_seq_start_rev);
4806 
4807                 if(search->pbp->is_ooframe) {
4808                     search->query_dnap = BlastCreateQueryDNAP(search, query_length);
4809                 }
4810 	} else if (StringCmp(prog_name, "blastn") == 0) {
4811 	   if (search->last_context - search->first_context > 0) {
4812 	      /* Both strands are searched */
4813 	      BlastSequenceAddSequence(search->context[search->first_context].query, NULL, query_seq_start, 2*query_length+2, 2*query_length+2, 0);
4814 	      BlastSequenceAddSequence(search->context[search->last_context].query, NULL,
4815 				       query_seq_start_rev, query_length,
4816 				       query_length, 0);
4817 	   } else if (search->first_context==0)
4818 	      /* Only first strand is searched */
4819 	      BlastSequenceAddSequence(search->context[search->first_context].query, NULL, query_seq_start, query_length+1, query_length+1, 0);
4820 	   else {/* Only second strand is searched */
4821 	      BlastSequenceAddSequence(search->context[search->first_context].query, NULL,
4822 				       query_seq_start_rev, query_length+1,
4823 				       query_length+1, 0);
4824 	   }
4825 	}
4826 
4827         if( mult_queries ) { /* AM: query concatenation: free resources */
4828             for( i = 0; i < mult_queries->NumQueries; ++i )
4829                 SeqLocFree( concat_filter_slp[i] );
4830 
4831             MemFree( concat_filter_slp );
4832         }
4833 
4834 	if (mask_at_hash)
4835 	{ /* No longer needed. */
4836 /*
4837 		filter_slp = SeqLocSetFree(filter_slp);
4838 */
4839 	}
4840 
4841 /* Set the ambiguous residue before the ScoreBlk is filled. */
4842 	if (StringCmp(prog_name, "blastn") != 0)
4843 	{
4844 		search->sbp->read_in_matrix = TRUE;
4845 		BlastScoreSetAmbigRes(search->sbp, 'X');
4846 	}
4847 	else
4848 	{
4849   	        if(options->matrix!=NULL && *(options->matrix) != NULLB) {
4850 		     search->sbp->read_in_matrix = TRUE;
4851 	        } else {
4852 		     search->sbp->read_in_matrix = FALSE;
4853 	        }
4854 		BlastScoreSetAmbigRes(search->sbp, 'N');
4855 	}
4856 
4857 
4858 	search->sbp->penalty = options->penalty;
4859 	search->sbp->reward = options->reward;
4860 
4861         /* option is to use alignments chosen by user in PSM computation API (used in WWW PSI-Blast); */
4862 	search->pbp->use_best_align = options->use_best_align;
4863 
4864 
4865 	/* Should culling be used at all? */
4866 	search->pbp->perform_culling = options->perform_culling;
4867 	search->pbp->hsp_range_max = options->hsp_range_max;
4868         /* This assures that search->pbp->max_pieces is at least one wide. */
4869         block_width = MIN(query_length, options->block_width);
4870         if (block_width > 0)
4871            search->pbp->max_pieces = query_length/block_width;
4872 
4873 	search->sbp->query_length = query_length;
4874 
4875 	search->result_struct = BLASTResultsStructNew(search->result_size,
4876 						      search->pbp->max_pieces,
4877 						      search->pbp->hsp_range_max);
4878 
4879 	if (options->matrix != NULL)
4880 		status = BlastScoreBlkMatFill(search->sbp, options->matrix);
4881 	else
4882 		status = BlastScoreBlkMatFill(search->sbp, "BLOSUM62");
4883 
4884 	if (status != 0)
4885 	{
4886 		ErrPostEx(SEV_WARNING, 0, 0, "BlastScoreBlkMatFill returned non-zero status");
4887                 retval = 1;
4888                 goto BlastSetUpReturn;
4889 	}
4890 
4891 	/* This is used right below. */
4892 	search->pbp->gapped_calculation = options->gapped_calculation;
4893 	search->pbp->do_not_reevaluate = options->do_not_reevaluate;
4894 
4895     /* Set up sum statistics */
4896     search->pbp->do_sum_stats = options->do_sum_stats;
4897     if(search->prog_number == blast_type_blastx  ||
4898        search->prog_number == blast_type_tblastn ||
4899        search->prog_number == blast_type_psitblastn)
4900     {
4901         /* The program may use new_link_hsps to evaluate sum
4902            statistics. */
4903         Int4 max_protein_gap; /* the largest gap permitted in the
4904                                * translated sequence */
4905 
4906         max_protein_gap = (options->longest_intron - 2)/3;
4907         if(search->pbp->gapped_calculation) {
4908             if(options->longest_intron == 0) {
4909                 /* a zero value of longest_intron
4910                  * invokes the default behavior, which for gapped
4911                  * calculation is to set longest_intron to a
4912                  * predefined value. */
4913                 search->pbp->longest_intron = (DEFAULT_LONGEST_INTRON - 2) / 3;
4914             } else if(max_protein_gap <= 0) {
4915                 /* A nonpositive value of max_protein_gap turns linking off */
4916                 search->pbp->do_sum_stats = FALSE;
4917                 search->pbp->longest_intron = 0;
4918             } else { /* the value of max_protein_gap is positive */
4919                 search->pbp->longest_intron = max_protein_gap;
4920             }
4921         } else { /* This is an ungapped calculation. */
4922             /* For ungapped calculations, we preserve the old behavior
4923              * of the longest_intron parameter to maintain
4924              * backward-compatibility with older versions of BLAST. */
4925             search->pbp->longest_intron = MAX(max_protein_gap, 0);
4926         }
4927     }
4928 	search->pbp->first_db_seq = options->first_db_seq;
4929 	search->pbp->final_db_seq = options->final_db_seq;
4930 
4931 	retval = 0;
4932 	for (index=search->first_context; index<=search->last_context; index++)
4933 	{
4934 	   /* AM: Changed to support query multiplexing. */
4935 	   if (search->prog_number != blast_type_blastn ||
4936 	       index>search->first_context ||
4937 	       search->last_context==search->first_context)
4938            {
4939 	     if( search->prog_number == blast_type_tblastn
4940 	         && search->mult_queries )
4941              {
4942 	       for( i = 0; i < search->mult_queries->NumQueries; ++i )
4943 	       {
4944                    sbptmp = BLAST_ScoreBlkNew(
4945                            Seq_code_ncbistdaa, search->last_context + 1 );
4946                    sbptmp->read_in_matrix = TRUE;
4947 		   BlastScoreSetAmbigRes( sbptmp, 'X' );
4948 	           sbptmp->penalty = options->penalty;
4949 	           sbptmp->reward = options->reward;
4950                    sbptmp->query_length = query_length;
4951 
4952 	           if (options->matrix != NULL)
4953 		           status = BlastScoreBlkMatFill(sbptmp, options->matrix);
4954 	           else
4955 		           status = BlastScoreBlkMatFill(sbptmp, "BLOSUM62");
4956 
4957 	         status = BlastScoreBlkFill(
4958                          sbptmp,
4959 		   ((CharPtr)search->context[index].query->sequence)
4960 		     + search->mult_queries->QueryStarts[i],
4961                    search->mult_queries->QueryEnds[i]
4962 		     - search->mult_queries->QueryStarts[i] + 1,
4963                    index );
4964 
4965                  if( status ) break;
4966 
4967                  search->mult_queries->lambda_array[i]
4968 		   = sbptmp->kbp_std[search->first_context]->Lambda;
4969 
4970 		 if( i )
4971 		 {
4972 		   if( search->mult_queries->LambdaMin
4973 		       > sbptmp->kbp_std[search->first_context]->Lambda )
4974                      search->mult_queries->LambdaMin
4975 		       = sbptmp->kbp_std[search->first_context]->Lambda;
4976 
4977 		   if( search->mult_queries->LambdaMax
4978 		       < sbptmp->kbp_std[search->first_context]->Lambda )
4979                      search->mult_queries->LambdaMax
4980 		       = sbptmp->kbp_std[search->first_context]->Lambda;
4981 
4982 		   if( search->mult_queries->LogKMin
4983 		       > sbptmp->kbp_std[search->first_context]->logK )
4984                      search->mult_queries->LogKMin
4985 		       = sbptmp->kbp_std[search->first_context]->logK;
4986 
4987 		   if( search->mult_queries->LogKMax
4988 		       < sbptmp->kbp_std[search->first_context]->logK )
4989                      search->mult_queries->LogKMax
4990 		       = sbptmp->kbp_std[search->first_context]->logK;
4991 		 }
4992 		 else
4993 		 {
4994 		   search->mult_queries->LambdaMin
4995 		     = search->mult_queries->LambdaMax
4996 		     = sbptmp->kbp_std[search->first_context]->Lambda;
4997                    search->mult_queries->LogKMin
4998 		     = search->mult_queries->LogKMax
4999 		     = sbptmp->kbp_std[search->first_context]->logK;
5000 		 }
5001 
5002                  sbptmp = BLAST_ScoreBlkDestruct( sbptmp );
5003 	       }
5004              }
5005 
5006 	     status
5007 	       = BlastScoreBlkFill(search->sbp, (CharPtr)
5008 				   search->context[index].query->sequence,
5009 				   search->context[index].query->length,
5010 				   index);
5011            }
5012 	   else
5013 	   {
5014 	     status
5015 	       = BlastScoreBlkFill(search->sbp, (CharPtr)
5016 			    	   search->context[index].query->sequence,
5017 				   search->context[index+1].query->length,
5018 				   index);
5019            }
5020 
5021 		if (status != 0)
5022 		{
5023 			sprintf(buffer, "Unable to calculate Karlin-Altschul params, check query sequence");
5024 			BlastConstructErrorMessage("BLASTSetUpSearch", buffer, 2, &(search->error_return));
5025 			retval = 1;
5026 		}
5027 		if (search->pbp->gapped_calculation)
5028                 {
5029                     if (StringCmp(search->prog_name, "blastn") != 0)
5030 		    {
5031 			search->sbp->kbp_gap_std[index] = BlastKarlinBlkCreate();
5032                 	status = BlastKarlinBlkGappedCalcEx(search->sbp->kbp_gap_std[index], options->gap_open, options->gap_extend, options->decline_align, search->sbp->name, &(search->error_return));
5033 			if (status != 0)
5034 			{
5035 				retval = 1;
5036 			}
5037 			search->sbp->kbp_gap_psi[index] = BlastKarlinBlkCreate();
5038                 	status = BlastKarlinBlkGappedCalcEx(search->sbp->kbp_gap_psi[index], options->gap_open, options->gap_extend, options->decline_align, search->sbp->name, &(search->error_return));
5039 			if (status != 0)
5040 			{
5041 				retval = 1;
5042 			}
5043 		   }
5044                    else
5045                    {
5046 			search->sbp->kbp_gap_std[index] = BlastKarlinBlkCreate();
5047                         status = BlastKarlinBlkNuclGappedCalc(search->sbp->kbp_gap_std[index], options->gap_open, options->gap_extend, options->reward, options->penalty, search->sbp->kbp_std[index], &(search->sbp->round_down), &(search->error_return));
5048                         if (status != 0)
5049                               retval = 1;
5050                    }
5051                }
5052 	}
5053 
5054 	search->sbp->kbp_gap = search->sbp->kbp_gap_std;
5055         search->sbp->kbp = search->sbp->kbp_std;
5056 	if (search->pbp->gapped_calculation && StringCmp(prog_name, "blastn") != 0)
5057 	{
5058         Int4 array_size = BlastKarlinGetMatrixValues(search->sbp->name,
5059                                                      NULL, NULL, NULL, NULL,
5060                                                      NULL, NULL);
5061 		if ( !(array_size > 0)) {
5062            /* This can only happen in case of unsupported matrix! */
5063            sprintf(buffer,
5064                    "matrix %s is not supported\n",
5065                    search->sbp->name);
5066            BlastConstructErrorMessage("BLASTSetUpSearch", buffer, 2,
5067                                       &search->error_return);
5068            retval = 1;
5069         }
5070 		if (search->sbp->kbp_ideal == NULL)
5071         		search->sbp->kbp_ideal = BlastKarlinBlkStandardCalcEx(search->sbp);
5072 	}
5073 
5074 	/* Adjust the Karlin parameters. */
5075 	if (StringCmp(prog_name, "blastx") == 0  ||
5076             StringCmp(prog_name, "tblastx") == 0 ||
5077             (search->pbp->is_rps_blast && !StringCmp(prog_name, "tblastn")))
5078 	{
5079             /* Make sure ideal values are used for RPS tblastn, because the previously
5080                obtained values are for the fake protein. */
5081             if (search->pbp->is_rps_blast && !StringCmp(prog_name, "tblastn"))
5082                search->sbp->kbp[0]->Lambda = search->sbp->kbp_ideal->Lambda;
5083             BlastKarlinBlkStandardCalc(search->sbp, search->first_context, search->last_context);
5084 	}
5085 
5086 	/* If retval was set non-zero above (by the routines calculating Karlin-Altschul params),
5087 	   return here before these values are used.
5088 	*/
5089 	if (retval)
5090            goto BlastSetUpReturn;
5091 
5092 	if (options->gapped_calculation) {
5093 
5094         BLAST_KarlinBlkPtr kbp_gap =
5095           search->sbp->kbp_gap_std[search->first_context];
5096         Nlm_FloatHi alpha, beta; /*alpha and beta for the scoring system */
5097         if (StringCmp(options->program_name, "blastn") != 0)
5098             getAlphaBeta(options->matrix,&alpha,&beta,options->gapped_calculation,
5099                      options->gap_open, options->gap_extend);
5100         else
5101             BlastKarlinGetNuclAlphaBeta(options->reward, options->penalty, options->gap_open,
5102                      options->gap_extend, kbp_gap, options->gapped_calculation, &alpha, &beta);
5103 
5104         BlastComputeLengthAdjustment(kbp_gap->K,
5105                                      kbp_gap->logK,
5106                                      alpha/kbp_gap->Lambda, beta,
5107                                      length,
5108                                      search->dblen, search->dbseq_num,
5109                                      &length_adjustment );
5110 
5111         effective_query_length = length - length_adjustment;
5112 
5113         /* AM: If concatenating queries, then compute effective lengths of
5114            individual queries. */
5115         if( search->mult_queries )
5116 		{
5117             search->mult_queries->TotalLength = length;
5118             lengths_eff =
5119                 (IntArray) MemNew( sizeof( Int4 )*
5120                                    search->mult_queries->NumQueries );
5121             length_adj_tmp =
5122                 (IntArray)MemNew( sizeof( Int4 )*
5123                                   search->mult_queries->NumQueries );
5124 
5125             for( le_iter = 0;
5126                  le_iter < search->mult_queries->NumQueries;
5127                  ++le_iter ) {
5128                 length_tmp = search->mult_queries->QueryEnds[le_iter]
5129                     - search->mult_queries->QueryStarts[le_iter]
5130                     + 1;
5131                 length_adj_tmp[le_iter] = 0;
5132 
5133                 BlastComputeLengthAdjustment(kbp_gap->K,
5134                                              kbp_gap->logK,
5135                                              alpha/kbp_gap->Lambda,
5136                                              beta,
5137                                              length_tmp,
5138                                              search->dblen, search->dbseq_num,
5139                                              &length_adj_tmp[le_iter] );
5140 
5141                 lengths_eff[le_iter] = length_tmp - length_adj_tmp[le_iter];
5142 
5143                 search->mult_queries->EffLengths[le_iter] =
5144                     lengths_eff[le_iter];
5145                 search->mult_queries->Adjustments[le_iter] =
5146                     length_adj_tmp[le_iter];
5147 
5148 		    if( search->mult_queries->MinLen > length_tmp )
5149 		      search->mult_queries->MinLen = length_tmp;
5150 
5151             if( search->mult_queries->MinLenEff > lengths_eff[le_iter] )
5152                 search->mult_queries->MinLenEff = lengths_eff[le_iter];
5153 		  }
5154 		}
5155     }
5156 	else /* this is an ungapped alignment */
5157 	{
5158         BLAST_KarlinBlkPtr kbp = search->sbp->kbp[search->first_context];
5159 
5160         BlastComputeLengthAdjustment( kbp->K, kbp->logK, 1/kbp->H, 0.0,
5161                                  length,
5162                                  search->dblen, search->dbseq_num,
5163                                  &length_adjustment );
5164 
5165         effective_query_length = length - length_adjustment;
5166 
5167         /* AM: If concatenating queries, then compute effective lengths of
5168            individual queries. */
5169         if( search->mult_queries ) {
5170             search->mult_queries->TotalLength = length;
5171             lengths_eff =
5172                 (IntArray)MemNew( sizeof( Int4 )*
5173                                   search->mult_queries->NumQueries );
5174             length_adj_tmp =
5175                 (IntArray)MemNew( sizeof( Int4 )*
5176                                   search->mult_queries->NumQueries );
5177 
5178             for( le_iter = 0;
5179                  le_iter < search->mult_queries->NumQueries;
5180                  ++le_iter ) {
5181                 length_tmp = search->mult_queries->QueryEnds[le_iter]
5182                     - search->mult_queries->QueryStarts[le_iter]
5183                     + 1;
5184                 length_adj_tmp[le_iter] = 0;
5185 
5186                 BlastComputeLengthAdjustment( kbp->K, kbp->logK,
5187                                               1/kbp->H, 0.0,
5188                                               length_tmp,
5189                                               search->dblen, search->dbseq_num,
5190                                               &(length_adj_tmp[le_iter]) );
5191 
5192                 lengths_eff[le_iter] = length_tmp - length_adj_tmp[le_iter];
5193                 search->mult_queries->EffLengths[le_iter] =
5194                     lengths_eff[le_iter];
5195                 search->mult_queries->Adjustments[le_iter] =
5196                     length_adj_tmp[le_iter];
5197 
5198                 if( search->mult_queries->MinLen > length_tmp )
5199                     search->mult_queries->MinLen = length_tmp;
5200 
5201                 if( search->mult_queries->MinLenEff > lengths_eff[le_iter] )
5202                     search->mult_queries->MinLenEff = lengths_eff[le_iter];
5203             }
5204 		}
5205     }
5206 
5207 	search->length_adjustment = MAX(length_adjustment, 0);
5208 
5209     if (!search->dblen_eff) {
5210         search->dblen_eff =
5211             search->dblen - search->dbseq_num*search->length_adjustment;
5212         /* AM: If concatenating queries find effective db lengths for each query. */
5213 	   if( search->mult_queries )
5214 	   {
5215 	     for( le_iter = 0; le_iter < search->mult_queries->NumQueries;
5216 	          ++le_iter )
5217 	     {
5218 	       if( search->prog_number == blast_type_blastn )
5219 	         search->mult_queries->DbLenEff[le_iter]
5220 	           = MAX( 1, search->dblen
5221 		             - search->dbseq_num*length_adj_tmp[le_iter] );
5222                else
5223 	         search->mult_queries->DbLenEff[le_iter]
5224 		   = MAX( search->dbseq_num,
5225 		          search->dblen
5226 			  - search->dbseq_num*length_adj_tmp[le_iter] );
5227 	     }
5228 
5229              MemFree( length_adj_tmp );
5230 	   }
5231         }
5232 
5233 	for (index=search->first_context; index<=search->last_context; index++)
5234 	{
5235 		search->context[index].query->effective_length = effective_query_length;
5236 	}
5237 
5238         /* AM: Setting up effective search spaces for individual queries. */
5239 	if (search->searchsp_eff == 0)
5240 	{
5241 		search->searchsp_eff = ((Nlm_FloatHi) search->dblen_eff)*((Nlm_FloatHi) effective_query_length);
5242 
5243 		if( search->mult_queries )
5244 		  for( le_iter = 0; le_iter < search->mult_queries->NumQueries; ++le_iter )
5245 		  {
5246 		    search->mult_queries->SearchSpEff[le_iter]
5247 		      = ((Nlm_FloatHi)search->mult_queries->DbLenEff[le_iter])
5248 		      * ((Nlm_FloatHi)lengths_eff[le_iter]);
5249 
5250                     if( lengths_eff[le_iter] == search->mult_queries->MinLenEff )
5251 		      search->mult_queries->MinSearchSpEff
5252 		        = search->mult_queries->SearchSpEff[le_iter];
5253                   }
5254         }
5255 	else if( search->mult_queries )
5256 	  for( le_iter = 0; le_iter < search->mult_queries->NumQueries; ++le_iter )
5257 	    search->mult_queries->SearchSpEff[le_iter] = search->searchsp_eff;
5258 
5259 	/* The default is that cutoff_s was not set and is zero. */
5260 	if (options->cutoff_s == 0)
5261 	{
5262 		search->pbp->cutoff_e = options->expect_value;
5263 		search->pbp->cutoff_e_set = TRUE;
5264 		search->pbp->cutoff_s = options->cutoff_s;
5265 		search->pbp->cutoff_s_set = FALSE;
5266 	}
5267 	else
5268 	{
5269 		search->pbp->cutoff_e = options->expect_value;
5270 		search->pbp->cutoff_e_set = FALSE;
5271 		search->pbp->cutoff_s = options->cutoff_s;
5272 		search->pbp->cutoff_s_set = TRUE;
5273 	}
5274 
5275         MemFree( lengths_eff ); /* AM: query concatenation: free resources */
5276 
5277 /* For now e2 is set to 0.5 and cutoff_e2_set is FALSE.  This is then
5278 changed to the proper values in blast_set_parameters.  In the final version
5279 of this program (where more blast programs and command-line options are
5280 available) this needs to be set higher up. */
5281 	if (options->cutoff_s2 == 0)
5282 	{
5283 		search->pbp->cutoff_e2 = options->e2;
5284 		search->pbp->cutoff_e2_set = TRUE;
5285 		search->pbp->cutoff_s2 = options->cutoff_s2;
5286 		search->pbp->cutoff_s2_set = FALSE;
5287 	}
5288 	else
5289 	{
5290 		search->pbp->cutoff_e2 = options->e2;
5291 		search->pbp->cutoff_e2_set = FALSE;
5292 		search->pbp->cutoff_s2 = options->cutoff_s2;
5293 		search->pbp->cutoff_s2_set = TRUE;
5294 	}
5295 
5296 	search->pbp->discontinuous = options->discontinuous;
5297 
5298 
5299 	/* For postion based blast. */
5300 	search->pbp->ethresh = options->ethresh;
5301 	search->pbp->maxNumPasses = options->maxNumPasses;
5302 	search->pbp->pseudoCountConst = options->pseudoCountConst;
5303 
5304 	if (NlmThreadsAvailable()) /* ONly allow more than one cpu if MT compiled. */
5305 		search->pbp->process_num = options->number_of_cpus;
5306 	else
5307 		search->pbp->process_num = 1;
5308 
5309 	search->pbp->cpu_limit = options->cpu_limit;
5310 	search->pbp->gap_decay_rate = options->gap_decay_rate;
5311 	search->pbp->gap_size = options->gap_size;
5312 	search->pbp->gap_prob = options->gap_prob;
5313 	search->pbp->old_stats = options->old_stats;
5314 	search->pbp->use_large_gaps = options->use_large_gaps;
5315 	search->pbp->number_of_bits = options->number_of_bits;
5316 	search->pbp->two_pass_method = options->two_pass_method;
5317 	search->pbp->multiple_hits_only = options->multiple_hits_only;
5318 	search->pbp->gap_open = options->gap_open;
5319 	search->pbp->gap_extend = options->gap_extend;
5320         search->pbp->decline_align = options->decline_align;
5321         search->pbp->total_hsp_limit = options->total_hsp_limit;
5322 
5323 	search->pbp->hsp_num_max = options->hsp_num_max;
5324 /* CHANGE HERE??? */
5325 	if (search->pbp->gapped_calculation && StringCmp(search->prog_name, "blastn"))
5326 	{
5327 /*
5328 		search->pbp->cutoff_s2_set = TRUE;
5329 */
5330 		if (StringCmp(search->prog_name, "blastn") != 0)
5331 		{
5332 			search->pbp->gap_x_dropoff = (BLAST_Score) (options->gap_x_dropoff*NCBIMATH_LN2 / search->sbp->kbp_gap[search->first_context]->Lambda);
5333 			search->pbp->gap_x_dropoff_final = (BLAST_Score) (options->gap_x_dropoff_final*NCBIMATH_LN2 / search->sbp->kbp_gap[search->first_context]->Lambda);
5334 
5335                   /* AM: Change to support query multiplexing. */
5336 		  if( StringCmp( search->prog_name, "tblastn" ) == 0
5337 		      && search->mult_queries )
5338                   {
5339 		    search->pbp->gap_trigger
5340 		      = (BLAST_Score)( ( options->gap_trigger*NCBIMATH_LN2
5341 		                           + search->mult_queries->LogKMin )
5342                                        /search->mult_queries->LambdaMax );
5343 		  }
5344 		  else
5345 			search->pbp->gap_trigger = (BLAST_Score) ((options->gap_trigger*NCBIMATH_LN2+search->sbp->kbp[search->first_context]->logK)/ search->sbp->kbp[search->first_context]->Lambda);
5346 		}
5347 		else
5348 		{
5349 			search->pbp->gap_x_dropoff = (BLAST_Score) (options->gap_x_dropoff*NCBIMATH_LN2 / search->sbp->kbp[search->first_context]->Lambda);
5350 			search->pbp->gap_x_dropoff_final = (BLAST_Score) (options->gap_x_dropoff_final*NCBIMATH_LN2 / search->sbp->kbp[search->first_context]->Lambda);
5351 			search->pbp->gap_trigger = (BLAST_Score) ((options->gap_trigger*NCBIMATH_LN2+search->sbp->kbp[search->first_context]->logK)/ search->sbp->kbp[search->first_context]->Lambda);
5352 		}
5353 		/* The trigger value sets the s2 cutoff. */
5354 		search->pbp->cutoff_s2 = (Int4) search->pbp->gap_trigger;
5355 	}
5356 	else
5357 	{
5358 		search->pbp->gap_x_dropoff = (BLAST_Score) (options->gap_x_dropoff*NCBIMATH_LN2 / search->sbp->kbp[search->first_context]->Lambda);
5359 		search->pbp->gap_x_dropoff_final = (BLAST_Score) (options->gap_x_dropoff_final*NCBIMATH_LN2 / search->sbp->kbp[search->first_context]->Lambda);
5360 		search->pbp->gap_trigger = (BLAST_Score) ((options->gap_trigger*NCBIMATH_LN2+search->sbp->kbp[search->first_context]->logK)/ search->sbp->kbp[search->first_context]->Lambda);
5361 		/* Set S and S2 equal if not sum stats. */
5362 		if (search->pbp->do_sum_stats == FALSE)
5363 			search->pbp->cutoff_s2 = search->pbp->cutoff_s;
5364 	}
5365 	/* Ensures that gap_x_dropoff_final is at least as large as gap_x_dropoff. */
5366 	search->pbp->gap_x_dropoff_final = MAX(search->pbp->gap_x_dropoff_final, search->pbp->gap_x_dropoff);
5367 
5368 /* "threshold" (first and second) must be set manually for two-pass right now.*/
5369 	search->pbp->threshold_set = TRUE;
5370 	search->pbp->threshold_second = options->threshold_second;
5371 
5372 	search->pbp->window_size = options->window_size;
5373 	search->pbp->window_size_set = TRUE;
5374 
5375 	search->whole_query = TRUE;
5376 	if (options->required_start != 0 || options->required_end != -1)
5377 	{
5378 		search->whole_query = FALSE;
5379 		search->required_start = options->required_start;
5380 		if (options->required_end != -1)
5381 			search->required_end = options->required_end;
5382 		else
5383 			search->required_end = query_length;
5384 	}
5385 
5386 	if (qlen <= 0)
5387 		qlen = query_length;
5388 
5389 	/* Use DROPOFF_NUMBER_OF_BITS as the default if it's set to zero. */
5390 	if (options->dropoff_1st_pass == 0)
5391 		options->dropoff_1st_pass = DROPOFF_NUMBER_OF_BITS;
5392 
5393 	if (options->dropoff_2nd_pass == 0)
5394 		options->dropoff_2nd_pass = DROPOFF_NUMBER_OF_BITS;
5395 
5396 	if (StringCmp(search->prog_name, "blastn") != 0)
5397 	{
5398 		avglen = BLAST_AA_AVGLEN;
5399 	}
5400 	else
5401 	{
5402 		avglen = BLAST_NT_AVGLEN;
5403 		/* Use only one type of gap for blastn */
5404 		search->pbp->ignore_small_gaps = FALSE;
5405 	}
5406 
5407 	if (search->rdfp)
5408 	{
5409 		Int4 total_number;
5410 		Int8 total_length;
5411 
5412 		readdb_get_totals(search->rdfp, &total_length, &total_number);
5413 		if (total_number > 0)
5414 			avglen = ((Nlm_FloatHi) total_length)/total_number;
5415 	}
5416         else if (search->dblen > 0 && search->dbseq_num == 1)
5417         {
5418                 avglen = search->dblen;
5419         }
5420 
5421 	if (blast_set_parameters(search, options->dropoff_1st_pass, options->dropoff_2nd_pass, avglen, search->searchsp_eff, options->window_size) != 0) {
5422            retval = 1;
5423            goto BlastSetUpReturn;
5424         }
5425 	if (options->scalingFactor == 0.0)
5426 		options->scalingFactor = 1.0;
5427 
5428 	if (options->scalingFactor != 0.0 && options->scalingFactor != 1.0)
5429 	{
5430 		search->pbp->gap_open *= options->scalingFactor;
5431 		search->pbp->gap_extend *= options->scalingFactor;
5432 		search->pbp->dropoff_1st_pass *= options->scalingFactor;
5433 		search->pbp->dropoff_2nd_pass *= options->scalingFactor;
5434 		search->pbp->gap_x_dropoff *= options->scalingFactor;
5435 		search->pbp->gap_x_dropoff_final *= options->scalingFactor;
5436 		search->pbp->decline_align *= options->scalingFactor;
5437 		search->pbp->gap_trigger *= options->scalingFactor;
5438 		search->pbp->cutoff_s *= options->scalingFactor;
5439 		search->pbp->cutoff_s1 *= options->scalingFactor;
5440 		search->pbp->cutoff_s2 *= options->scalingFactor;
5441 		search->pbp->cutoff_s2_max *= options->scalingFactor;
5442 		search->pbp->cutoff_s_first *= options->scalingFactor;
5443 		search->pbp->cutoff_s_second *= options->scalingFactor;
5444 	}
5445 	search->pbp->scalingFactor = options->scalingFactor;
5446         if (options->is_megablast_search)
5447            search->pbp->mb_params = MegaBlastParameterBlkNew(options);
5448 	search->pbp->explode_seqids = options->explode_seqids;
5449 
5450 	if (search->pbp->multiple_hits_only)
5451 	{
5452 		if (search->context[search->first_context].query->length < 2*options->wordsize)
5453 		{
5454 			BlastConstructErrorMessage("Blast",
5455 				"Query must be at least twice wordsize for two hit mode", 2, &(search->error_return));
5456                         retval = 1;
5457 			goto BlastSetUpReturn;
5458 		}
5459 	}
5460 	else
5461 	{
5462 		if (search->context[search->first_context].query->length < options->wordsize)
5463 		{
5464                    Char tmp_buffer[128];
5465                    sprintf(tmp_buffer,
5466                            "Query length %d is less than wordsize %d",
5467                        search->context[search->first_context].query->length,
5468                            options->wordsize);
5469                    BlastConstructErrorMessage("Blast", buffer, 2,
5470                                               &(search->error_return));
5471                    BlastConstructErrorMessage("Blast",
5472                       tmp_buffer, 2, &(search->error_return));
5473                    retval = 1;
5474                    goto BlastSetUpReturn;
5475 		}
5476 	}
5477 
5478 	search->thr_info->awake_index = FALSE;
5479 	if (NlmThreadsAvailable() && (search->context_factor*query_length) > INDEX_THR_MIN_SIZE) {
5480             search->thr_info->awake_index = TRUE;
5481             search->thr_info->last_tick = Nlm_GetSecs();
5482             search->thr_info->index_thr =
5483                 NlmThreadCreate(index_proc, search->thr_info);
5484             search->thr_info->index_callback = callback;
5485 	}
5486 
5487 	/* Only do this if this is not a pattern search. */
5488 	if (options->isPatternSearch == FALSE && search->pbp->is_rps_blast == FALSE)
5489 	{
5490 	   if (StrCmp(search->prog_name, "blastn"))
5491 	      last_index = search->last_context;
5492 	   else
5493 	      last_index = search->first_context;
5494 	   for (index=search->first_context; index<=last_index; index++)
5495 	   {
5496 		if (options->threshold_second > 0)
5497 		{
5498 			search->wfp = search->wfp_first;
5499 			if (!(search->positionBased)) /*AAS*/
5500 			    status = BlastFindWords(search, 0, search->context[index].query->length, options->threshold_second, (Uint1) index);
5501 			else
5502 			    status = BlastNewFindWords(search, 0, search->context[index].query->length, options->threshold_second, (Uint1) index);
5503 			if (status < 0) {
5504                             search->thr_info->awake_index = FALSE;
5505                             ErrPostEx(SEV_WARNING, 0, 0,
5506                                "BlastFindWords returned non-zero status");
5507                             retval = 1;
5508                             goto BlastSetUpReturn;
5509 			}
5510 		}
5511 		search->wfp = search->wfp_second;
5512 		if (StringCmp(prog_name, "blastn") != 0)
5513 		{
5514 		    if (search->allocated & BLAST_SEARCH_ALLOC_WFP_SECOND)
5515 		    {
5516 			if (!(search->positionBased))
5517 			    status = BlastFindWords(search, 0, search->context[index].query->length, options->threshold_second, (Uint1) index);
5518 			else
5519 			    status = BlastNewFindWords(search, 0, search->context[index].query->length, options->threshold_second, (Uint1) index);
5520 		    }
5521 		}
5522 		else
5523 		{
5524 			status = BlastNtFindWords(search, 0, search->context[index].query->length,
5525 		 		                      (Uint1) index);
5526 		}
5527 
5528 		search->context[index].location = ValNodeFree(search->context[index].location);
5529 
5530 		if (status > 0)
5531 		{
5532 			search->thr_info->awake_index = FALSE;
5533 			sprintf(buffer, "No valid letters to be indexed on context %d", index);
5534                         /* This is just a warning */
5535 			BlastConstructErrorMessage("Blast", buffer, 1,
5536                                                    &(search->error_return));
5537 		}
5538 		else if (status < 0)
5539 		{
5540 			search->thr_info->awake_index = FALSE;
5541 			sprintf(buffer, "Error finding words");
5542 			BlastConstructErrorMessage("Blast", buffer, 2, &(search->error_return));
5543                         retval = 1;
5544 			goto BlastSetUpReturn;
5545 		}
5546 	   }
5547 	   if (StrCmp(search->prog_name, "blastn"))
5548 	      lookup_position_aux_destruct(search->wfp->lookup);
5549 	   else
5550 	      mb_lookup_position_aux_destruct(search->wfp->lookup);
5551 	}
5552 
5553 
5554 	/*
5555 	Turn off the index thread by setting this flag.  Don't wait for a join, as the
5556 	search will take much longer than the one second for this to die.
5557 	*/
5558 	search->thr_info->awake_index = FALSE;
5559  BlastSetUpReturn:
5560 	if (private_slp && private_slp_delete)
5561 		private_slp = SeqLocFree(private_slp);
5562 	if (private_slp_rev)
5563 		private_slp_rev = SeqLocFree(private_slp_rev);
5564 
5565 	return retval;
5566 }
5567 
5568 Boolean
BlastGetFirstAndLastContext(CharPtr prog_name,SeqLocPtr query_slp,Int2Ptr first_context,Int2Ptr last_context,Uint1 strand_options)5569 BlastGetFirstAndLastContext(CharPtr prog_name, SeqLocPtr query_slp, Int2Ptr first_context, Int2Ptr last_context, Uint1 strand_options)
5570 {
5571 	Uint1 strand;
5572 
5573 	if (query_slp == NULL)
5574 	{	/* Query was a BioseqPtr, Check strand_options. */
5575 		strand = Seq_strand_both;
5576 	}
5577 	else
5578 	{
5579 		strand = SeqLocStrand(query_slp);
5580 	}
5581 
5582 	/*
5583 	Check the strand_options and use that if top or bottom is specified.
5584 	otherwise use what's specified above.
5585 	*/
5586 	if (strand_options == BLAST_TOP_STRAND)
5587 		strand = Seq_strand_plus;
5588 	else if (strand_options == BLAST_BOTTOM_STRAND)
5589 		strand = Seq_strand_minus;
5590 
5591         if (StringCmp(prog_name, "blastp") == 0
5592             || StringCmp(prog_name, "tblastn") == 0
5593             ||  StringCmp(prog_name, "psitblastn") == 0)
5594 	{
5595 		*first_context = 0;
5596 		*last_context = 0;
5597 	}
5598 	else if (StringCmp(prog_name, "blastx") == 0 || StringCmp(prog_name, "tblastx") == 0)
5599 	{
5600 		if (strand == Seq_strand_unknown || strand == Seq_strand_plus || strand == Seq_strand_both)
5601 			*first_context = 0;
5602 		else
5603 			*first_context = 3;
5604 
5605 		if (strand == Seq_strand_minus || strand == Seq_strand_both)
5606 			*last_context = 5;
5607 		else
5608 			*last_context = 2;
5609 	}
5610 	else if (StringCmp(prog_name, "blastn") == 0)
5611 	{
5612 		if (strand == Seq_strand_unknown || strand == Seq_strand_plus || strand == Seq_strand_both)
5613 			*first_context = 0;
5614 		else
5615 			*first_context = 1;
5616 
5617 		if (strand == Seq_strand_minus || strand == Seq_strand_both)
5618 			*last_context = 1;
5619 		else
5620 			*last_context = 0;
5621 	}
5622 	return TRUE;
5623 }
5624 
5625 BlastDoubleInt4Ptr
GetGisFromFile(CharPtr gifile,Int4Ptr gi_list_size)5626 GetGisFromFile (CharPtr gifile, Int4Ptr gi_list_size)
5627 {
5628     BlastDoubleInt4Ptr retval = NULL;
5629     Int4ListPtr gilist = NULL;
5630     register Int4 i;
5631 
5632     if ( !(gilist = Int4ListReadFromFile(gifile)))
5633         return NULL;
5634 
5635     retval = (BlastDoubleInt4Ptr) MemNew(sizeof(BlastDoubleInt4)*gilist->count);
5636     if (!retval)
5637         return retval;
5638 
5639     if (gi_list_size)
5640         *gi_list_size = gilist->count;
5641 
5642     for (i = 0; i < gilist->count; i++)
5643         retval[i].gi = gilist->i[i];
5644 
5645     gilist = Int4ListFree(gilist);
5646 
5647     return retval;
5648 }
5649 
5650 BlastSearchBlkPtr LIBCALL
BLASTSetUpSearchWithReadDbInternalEx(SeqLocPtr query_slp,BioseqPtr query_bsp,CharPtr prog_name,Int4 qlen,CharPtr dbname,BLAST_OptionsBlkPtr options,int (LIBCALLBACK * callback)PROTO ((Int4 done,Int4 positives)),SeqIdPtr seqid_list,BlastDoubleInt4Ptr gi_list,Int4 gi_list_total,ReadDBFILEPtr rdfp)5651 BLASTSetUpSearchWithReadDbInternalEx (SeqLocPtr query_slp, BioseqPtr query_bsp,
5652 				      CharPtr prog_name, Int4 qlen, CharPtr
5653 				      dbname, BLAST_OptionsBlkPtr options, int
5654 				      (LIBCALLBACK *callback)PROTO((Int4 done,
5655 								    Int4
5656 								    positives)),
5657 				      SeqIdPtr seqid_list, BlastDoubleInt4Ptr
5658 				      gi_list, Int4 gi_list_total, ReadDBFILEPtr
5659 				      rdfp)
5660 {
5661    if (options->is_megablast_search)
5662       return MegaBlastSetUpSearchWithReadDbInternal(query_slp, query_bsp,
5663 						    prog_name, 0,
5664 						    dbname, options, callback,
5665 						    seqid_list, gi_list,
5666 						    gi_list_total, rdfp);
5667    else
5668       return BLASTSetUpSearchWithReadDbInternal(query_slp, query_bsp,
5669 						prog_name, qlen,
5670 						dbname, options, callback,
5671 						seqid_list, gi_list,
5672 						gi_list_total, rdfp);
5673 }
5674 
5675 
5676 BlastSearchBlkPtr
BLASTSetUpSearchWithReadDbInternal(SeqLocPtr query_slp,BioseqPtr query_bsp,CharPtr prog_name,Int4 qlen,CharPtr dbname,BLAST_OptionsBlkPtr options,int (LIBCALLBACK * callback)PROTO ((Int4 done,Int4 positives)),SeqIdPtr seqid_list,BlastDoubleInt4Ptr gi_list,Int4 gi_list_total,ReadDBFILEPtr rdfp)5677 BLASTSetUpSearchWithReadDbInternal (SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, ReadDBFILEPtr rdfp)
5678 {
5679 	return BLASTSetUpSearchWithReadDbInternalMult(query_slp, query_bsp, prog_name, qlen, dbname, options, callback, seqid_list, gi_list, gi_list_total, rdfp, NULL);
5680 }
5681 
5682 
5683 /**
5684  * Calculate the hitlist size for preliminary alignments with a single
5685  * query, i.e. all but the final alignment with traceback.  This size
5686  * is generally somewhat larger than the final hitlist size because:
5687  * - the final alignment is the most sensitive, and may improve the
5688  *   score of alignments that would not otherwise be reported; and
5689  * - when composition-based statitics is used, many hits may be
5690  *   dropped in the final phase
5691  */
5692 Int4
BlastSingleQueryResultSize(BLAST_OptionsBlkPtr options)5693 BlastSingleQueryResultSize(BLAST_OptionsBlkPtr options)
5694 {
5695     Int4 result_size = /* size to be returned */
5696         options->hitlist_size;
5697 
5698     if (options->tweak_parameters) {
5699         /* Composition based statistics are being used. */
5700         result_size *= 2;
5701     }
5702     if ((options->is_megablast_search && options->no_traceback) ||
5703         (!options->is_megablast_search && options->gapped_calculation)) {
5704         /* This search uses preliminary alignments before the final
5705          * gapped calculation with traceback; increase the results
5706          * size. */
5707         result_size = MIN(2*result_size, result_size + 50);
5708     }
5709     return result_size;
5710 }
5711 
5712 
5713 BlastSearchBlkPtr
BLASTSetUpSearchWithReadDbInternalMult(SeqLocPtr query_slp,BioseqPtr query_bsp,CharPtr prog_name,Int4 qlen,CharPtr dbname,BLAST_OptionsBlkPtr options,int (LIBCALLBACK * callback)PROTO ((Int4 done,Int4 positives)),SeqIdPtr seqid_list,BlastDoubleInt4Ptr gi_list,Int4 gi_list_total,ReadDBFILEPtr rdfp,QueriesPtr mult_queries)5714 BLASTSetUpSearchWithReadDbInternalMult (SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, ReadDBFILEPtr rdfp, QueriesPtr mult_queries)
5715 /* --KM added mult_queries param */
5716 
5717 {
5718 
5719     BlastSearchBlkPtr search = NULL;
5720     Boolean multiple_hits, options_alloc=FALSE;
5721     Int2 status, first_context = 0, last_context = 0;
5722     Int8	dblen = 0;
5723     Int4	query_length = 0;
5724     Nlm_FloatHi	searchsp_eff = 0;
5725     Int4        hitlist_size = 0;
5726     Int4 i = 0; /* AM: Query multiplexing. */
5727 
5728     /* Allocate default options if none are allocated yet. */
5729     if (options == NULL) {
5730         options = BLASTOptionNew(prog_name, FALSE);
5731         options_alloc = TRUE;
5732     }
5733 
5734     multiple_hits = options->multiple_hits_only;
5735     /*
5736     if (options->window_size != 0)
5737         multiple_hits = TRUE;
5738     else
5739         multiple_hits = FALSE;
5740     */
5741     BlastGetFirstAndLastContext(prog_name, query_slp, &first_context, &last_context, options->strand_option);
5742 
5743     if (query_slp)
5744         query_length = SeqLocLen(query_slp);
5745     else
5746         query_length = query_bsp->length;
5747 
5748     hitlist_size = BlastSingleQueryResultSize(options);
5749 
5750     /* AM: Query multiplexing */
5751     if( mult_queries )
5752     {
5753       for( i = 0; i < mult_queries->NumQueries; ++i )
5754         mult_queries->result_info[i].results
5755 	  = (BLASTResultHitlistPtr *)MemNew(
5756 	      (hitlist_size + 1)*sizeof( BLASTResultHitlistPtr ) );
5757 
5758       mult_queries->max_results_per_query = hitlist_size;
5759       hitlist_size *= mult_queries->NumQueries;
5760     }
5761 
5762     /* On the first call query length is used for the subject length. */
5763     search = BlastSearchBlkNewExtra(options->wordsize, query_length, dbname, multiple_hits, 0, options->threshold_second, hitlist_size, prog_name, NULL, first_context, last_context, rdfp, options->window_size);
5764 
5765     if (search) {
5766        readdb_get_totals_ex(search->rdfp, &(dblen), &(search->dbseq_num), TRUE);
5767 
5768        if (!options->ignore_gilist)
5769        {
5770            Boolean looking_for_gis = FALSE;
5771            /* Create virtual database if any of the databases have gi lists or
5772               ordinal id masks, or if gi list is provided from options */
5773            looking_for_gis = BlastProcessGiLists(search, options, gi_list, gi_list_total);
5774 
5775            /* search->thr_info->blast_gi_list will be non-NULL if gi_list or
5776             * options->gilist or options->gifile was non-NULL and therefore
5777             * intersected with any oidlists in the search->rdfp(s). If this is the
5778             * case, we need to recalculate the database length and number of
5779             * sequences */
5780            if (search->thr_info->blast_gi_list && !options->use_real_db_size)
5781                readdb_get_totals_ex3(search->rdfp, &dblen, &search->dbseq_num,
5782                                  FALSE, TRUE, eApproximate);
5783 
5784            if (looking_for_gis && search->thr_info->blast_gi_list == NULL)
5785            {
5786                ErrPostEx(SEV_WARNING, 0, 0, "Intersection of gilist and BLAST database ID's empty");
5787                search->query_invalid = TRUE;
5788            }
5789        }
5790 
5791         /* command-line/options trump alias file. */
5792         if (options->db_length > 0)
5793            dblen = options->db_length;
5794         if (options->dbseq_num > 0)
5795             search->dbseq_num = options->dbseq_num;
5796         if (options->searchsp_eff > 0)
5797             searchsp_eff = options->searchsp_eff;
5798 
5799         if (StringCmp(prog_name, "tblastn") == 0 ||
5800             StringCmp(prog_name, "tblastx") == 0 ||
5801             StringCmp(prog_name, "psitblastn") == 0) {
5802             dblen /= 3;
5803             searchsp_eff /= 3.0;
5804         }
5805         search->dblen = dblen;
5806         if (options->db_length > 0)
5807            search->dblen_eff = dblen;
5808         search->searchsp_eff = searchsp_eff;
5809 	/* AM: Moved next two lines here to be able to use mult_queries
5810 	       in BLASTSetUpSearchInternalByLoc() */
5811 	/* --KM put mult_queries, from Main, into the search structure */
5812 	search->mult_queries = mult_queries;
5813         status = BLASTSetUpSearchInternalByLoc (search, query_slp, query_bsp, prog_name, qlen, options, callback);
5814         if (status != 0) {
5815             ErrPostEx(SEV_WARNING, 0, 0, "SetUpBlastSearch failed.");
5816             search->query_invalid = TRUE;
5817         }
5818 
5819         if (search->pbp->mb_params)
5820             search = GreedyAlignMemAlloc(search);
5821         else
5822             search->abmp = NULL;
5823 
5824         if (search->rdfp->parameters & READDB_CONTENTS_ALLOCATED)
5825             search->rdfp = ReadDBCloseMHdrAndSeqFiles(search->rdfp);
5826     }
5827 
5828     if (options_alloc)
5829         options = BLASTOptionDelete(options);
5830 
5831     return search;
5832 }
5833 
5834 /*
5835 	Performs setup for a BLAST search.  This function must be used
5836 	with a search file accessed through readdb.
5837 */
5838 
5839 BlastSearchBlkPtr LIBCALL
BLASTSetUpSearchWithReadDb(BioseqPtr query_bsp,CharPtr prog_name,Int4 qlen,CharPtr dbname,BLAST_OptionsBlkPtr options,int (LIBCALLBACK * callback)PROTO ((Int4 done,Int4 positives)))5840 BLASTSetUpSearchWithReadDb(BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
5841 
5842 {
5843 	return BLASTSetUpSearchWithReadDbInternal(NULL, query_bsp, prog_name, qlen, dbname, options, callback, NULL, NULL, 0, NULL);
5844 }
5845 
5846 BlastSearchBlkPtr LIBCALL
BLASTSetUpSearchWithReadDbEx(BioseqPtr query_bsp,CharPtr prog_name,Int4 qlen,CharPtr dbname,BLAST_OptionsBlkPtr options,int (LIBCALLBACK * callback)PROTO ((Int4 done,Int4 positives)),SeqIdPtr seqid_list,BlastDoubleInt4Ptr gi_list,Int4 gi_list_total)5847 BLASTSetUpSearchWithReadDbEx(BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total)
5848 
5849 {
5850 	return BLASTSetUpSearchWithReadDbInternal (NULL, query_bsp, prog_name, qlen, dbname, options, callback, seqid_list, gi_list, gi_list_total, NULL);
5851 }
5852 
5853 /*
5854 	Performs setup for a BLAST search.  This function must be used
5855 	with a search file accessed through readdb.
5856 */
5857 
5858 BlastSearchBlkPtr LIBCALL
BLASTSetUpSearchByLocWithReadDb(SeqLocPtr query_slp,CharPtr prog_name,Int4 qlen,CharPtr dbname,BLAST_OptionsBlkPtr options,int (LIBCALLBACK * callback)PROTO ((Int4 done,Int4 positives)))5859 BLASTSetUpSearchByLocWithReadDb(SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
5860 
5861 {
5862 	return BLASTSetUpSearchWithReadDbInternalMult (query_slp, NULL, prog_name, qlen, dbname, options, callback, NULL, NULL, 0, NULL, NULL);
5863 	/* --KM pass NULL mult_queries */
5864 }
5865 
5866 
5867 BlastSearchBlkPtr LIBCALL
BLASTSetUpSearchByLocWithReadDbEx(SeqLocPtr query_slp,CharPtr prog_name,Int4 qlen,CharPtr dbname,BLAST_OptionsBlkPtr options,int (LIBCALLBACK * callback)PROTO ((Int4 done,Int4 positives)),SeqIdPtr seqid_list,BlastDoubleInt4Ptr gi_list,Int4 gi_list_total,QueriesPtr mult_queries)5868 BLASTSetUpSearchByLocWithReadDbEx(SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, CharPtr dbname, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)), SeqIdPtr seqid_list, BlastDoubleInt4Ptr gi_list, Int4 gi_list_total, QueriesPtr mult_queries)
5869 /* --KM added mult_queries param */
5870 
5871 {
5872 	return BLASTSetUpSearchWithReadDbInternalMult (query_slp, NULL, prog_name, qlen, dbname, options, callback, seqid_list, gi_list, gi_list_total, NULL, mult_queries);
5873 	/* --KM pass mult_queries */
5874 }
5875 static BlastSearchBlkPtr
BLASTSetUpSearchEx(SeqLocPtr query_slp,BioseqPtr query_bsp,CharPtr prog_name,Int4 qlen,Int8 dblen,BlastAllWordPtr all_words,BLAST_OptionsBlkPtr options,int (LIBCALLBACK * callback)PROTO ((Int4 done,Int4 positives)))5876 BLASTSetUpSearchEx (SeqLocPtr query_slp, BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, Int8 dblen, BlastAllWordPtr all_words, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
5877 
5878 {
5879 	BlastSearchBlkPtr search;
5880 	Boolean options_alloc=FALSE, multiple_hits;
5881 	Int2 status, first_context, last_context;
5882 	Int4 actual_query_length=0;
5883 	Nlm_FloatHi searchsp_eff=0;
5884         Int4        hitlist_size;
5885 
5886 	/* Allocate default options if no are allocated yet. */
5887 	if (options == NULL)
5888 	{
5889 		options = BLASTOptionNew(prog_name, FALSE);
5890 		options_alloc = TRUE;
5891 	}
5892 
5893         multiple_hits = options->multiple_hits_only;
5894         /*
5895 	if (options->window_size != 0)
5896 		multiple_hits = TRUE;
5897 	else
5898 		multiple_hits = FALSE;
5899         */
5900 	if (query_slp == NULL && query_bsp == NULL)
5901 		return NULL;
5902 
5903 	if (query_slp)
5904 		actual_query_length = SeqLocLen(query_slp);
5905 	else if (query_bsp)
5906 		actual_query_length = query_bsp->length;
5907 
5908 	if (qlen <= 0)
5909 	{
5910 		qlen = actual_query_length;
5911 	}
5912 
5913 	/* If dblen is not set, use qlen. */
5914 	if (dblen <= 0)
5915 		dblen = qlen;
5916 
5917 	BlastGetFirstAndLastContext(prog_name, query_slp, &first_context, &last_context, options->strand_option);
5918 
5919         hitlist_size = BlastSingleQueryResultSize(options);
5920 
5921         /* On the first call query length is used for the subject length. */
5922 	search = BlastSearchBlkNew(options->wordsize, actual_query_length, NULL, multiple_hits, 0, options->threshold_second, hitlist_size, prog_name, all_words, first_context, last_context, options->window_size);
5923 
5924 	if (search)
5925 	{
5926                 search->subject->length = dblen;
5927 		/* Options setting overrides parameter. */
5928                 if (options->db_length > 0)
5929 			dblen = options->db_length;
5930 		if (options->searchsp_eff > 0)
5931 			searchsp_eff = options->searchsp_eff;
5932                 if (StringCmp(prog_name, "tblastn") == 0
5933                     || StringCmp(prog_name, "tblastx") == 0
5934                     || StringCmp(prog_name, "psitblastn") == 0)
5935                 {
5936                         dblen /= 3;
5937                         searchsp_eff /= 3.0;
5938                 }
5939 		if (options->dbseq_num > 0)
5940 			search->dbseq_num = options->dbseq_num;
5941 		else
5942 			search->dbseq_num = (Int4) dblen/qlen;
5943 
5944 		if (search->dbseq_num <=0)
5945 			search->dbseq_num = 1;
5946 
5947 		search->dblen = dblen;
5948 		/* If searchsp_eff is > 0 it will be used. */
5949 		search->searchsp_eff = searchsp_eff;
5950                 if (options->is_megablast_search)
5951                    search->pbp->mb_params = MegaBlastParameterBlkNew(options);
5952 		if (search->pbp->mb_params)
5953 		   status = MegaBlastSetUpSearchInternalByLoc (search, query_slp, query_bsp, prog_name, qlen, options, callback);
5954 		else
5955 		   status = BLASTSetUpSearchInternalByLoc(search, query_slp, query_bsp, prog_name, qlen, options, callback);
5956 		if (status != 0)
5957 		{
5958 	  		ErrPostEx(SEV_WARNING, 0, 0, "SetUpBlastSearch failed.");
5959 			search->query_invalid = TRUE;
5960 		}
5961 
5962 		if (search->pbp->mb_params)
5963 	   		search = GreedyAlignMemAlloc(search);
5964 	}
5965 
5966 	if (options_alloc)
5967 		options = BLASTOptionDelete(options);
5968 
5969 	return search;
5970 }
5971 
5972 /*
5973 	Performs necessary setup for a BLAST search.  The arguments are:
5974 
5975 	 - search: BlastSearchBlkPtr created by BlastSearchBlkNew
5976 	 - query_bsp: BioseqPtr for the query
5977 	 - matrix: CharPtr containing the name of the matrix
5978 	 - prog_name: CharPtr containing name of the program
5979 	 - qlen: Int4 with length of the query, if a lenght should be
5980 		specified (for statistical calculations); if this argument is
5981 		zero, then query_bsp->length is used.
5982 	 -dblen: Int8 with length of the database.
5983 	 - e_cutoff: BLAST_Score specifying the "expect" value.
5984 	 - number_of_processors: number of processors to use.
5985 	 - gap_decay_rate: between zero and one, related to prob. of # of HSP's.
5986 	 - gap_size: largest allowable gap if "small" gaps are used.
5987 	 - gap_prob: probability of "small" gap model being correct.
5988 	 - multiple_hits: if TRUE, multiple hits method is used.
5989 	 - window: window size for multiple hits method
5990 	 - threshold_second: initial hit threshold for 2nd pass
5991 	 - discontiguous: should discontiguous words be used?
5992 	 - old_stats: should the old statistics be used?
5993 	 - is_prot: is this a protein?
5994 
5995 
5996 */
5997 
5998 BlastSearchBlkPtr LIBCALL
BLASTSetUpSearch(BioseqPtr query_bsp,CharPtr prog_name,Int4 qlen,Int8 dblen,BlastAllWordPtr all_words,BLAST_OptionsBlkPtr options,int (LIBCALLBACK * callback)PROTO ((Int4 done,Int4 positives)))5999 BLASTSetUpSearch (BioseqPtr query_bsp, CharPtr prog_name, Int4 qlen, Int8 dblen, BlastAllWordPtr all_words, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
6000 
6001 {
6002 	return BLASTSetUpSearchEx (NULL, query_bsp, prog_name, qlen, dblen, all_words, options, callback);
6003 }
6004 
6005 BlastSearchBlkPtr LIBCALL
BLASTSetUpSearchByLoc(SeqLocPtr query_slp,CharPtr prog_name,Int4 qlen,Int8 dblen,BlastAllWordPtr all_words,BLAST_OptionsBlkPtr options,int (LIBCALLBACK * callback)PROTO ((Int4 done,Int4 positives)))6006 BLASTSetUpSearchByLoc (SeqLocPtr query_slp, CharPtr prog_name, Int4 qlen, Int8 dblen, BlastAllWordPtr all_words, BLAST_OptionsBlkPtr options, int (LIBCALLBACK *callback)PROTO((Int4 done, Int4 positives)))
6007 
6008 {
6009 	return BLASTSetUpSearchEx (query_slp, NULL, prog_name, qlen, dblen, all_words, options, callback);
6010 }
6011 
6012 static int LIBCALLBACK
diag_compare_hsps(VoidPtr v1,VoidPtr v2)6013 diag_compare_hsps(VoidPtr v1, VoidPtr v2)
6014 {
6015    BLAST_HSPPtr h1, h2;
6016 
6017    h1 = *((BLAST_HSPPtr PNTR) v1);
6018    h2 = *((BLAST_HSPPtr PNTR) v2);
6019 
6020    return (h1->query.offset - h1->subject.offset) -
6021       (h2->query.offset - h2->subject.offset);
6022 }
6023 
6024 /*
6025    Shifts all HSP coordinates according to where the partial sequence
6026    started in the large sequence
6027 */
AdjustOffsetsInBLASTHitList(BLAST_HitListPtr hitlist,Int4 start)6028 void AdjustOffsetsInBLASTHitList(BLAST_HitListPtr hitlist, Int4 start)
6029 {
6030    Int4 index;
6031    BLAST_HSPPtr hsp;
6032 
6033    for (index=0; index<hitlist->hspcnt; index++) {
6034       hsp = hitlist->hsp_array[index];
6035       hsp->subject.offset += start;
6036       hsp->subject.end += start;
6037       hsp->subject.gapped_start += start;
6038       if (hsp->gap_info)
6039          hsp->gap_info->start2 += start;
6040    }
6041 }
6042 
6043 #define DBSEQ_CHUNK_OVERLAP 100
6044 #define OVERLAP_DIAG_CLOSE 10
6045 /* This is a hard merge, i.e. the two HSPs will be merged only
6046    if they intersect
6047 */
6048 static Boolean
BLASTMergeHsps(BLAST_HSPPtr hsp1,BLAST_HSPPtr hsp2,Int4 start)6049 BLASTMergeHsps(BLAST_HSPPtr hsp1, BLAST_HSPPtr hsp2, Int4 start)
6050 {
6051    BLASTHSPSegmentPtr segments1, segments2, new_segment1, new_segment2;
6052    GapXEditScriptPtr esp1, esp2, esp;
6053    Int4 end = start + DBSEQ_CHUNK_OVERLAP - 1;
6054    Int4 min_diag, max_diag, num1, num2, dist, next_dist=0;
6055    Int4 diag1_start, diag1_end, diag2_start, diag2_end;
6056    Int4 index;
6057    Uint1 intersection_found;
6058    Uint1 op_type;
6059 
6060    if (!hsp1->gap_info || !hsp2->gap_info) {
6061       /* Assume that this is an ungapped alignment, hence simply compare
6062          diagonals. Do not merge if they are on different diagonals */
6063       if (diag_compare_hsps(&hsp1, &hsp2) == 0 &&
6064           hsp1->query.end >= hsp2->query.offset) {
6065          hsp1->query.end = hsp2->query.end;
6066          hsp1->subject.end = hsp2->subject.end;
6067          hsp1->query.length = hsp1->query.end - hsp1->query.offset;
6068          hsp1->subject.length = hsp1->subject.end - hsp1->subject.offset;
6069          return TRUE;
6070       } else
6071          return FALSE;
6072    }
6073    /* Find whether these HSPs have an intersection point */
6074    segments1 = (BLASTHSPSegmentPtr) MemNew(sizeof(BLASTHSPSegment));
6075 
6076    esp1 = hsp1->gap_info->esp;
6077    esp2 = hsp2->gap_info->esp;
6078 
6079    segments1->q_start = hsp1->query.offset;
6080    segments1->s_start = hsp1->subject.offset;
6081    while (segments1->s_start < start) {
6082       if (esp1->op_type == GAPALIGN_INS)
6083          segments1->q_start += esp1->num;
6084       else if (segments1->s_start + esp1->num < start) {
6085          if (esp1->op_type == GAPALIGN_SUB) {
6086             segments1->s_start += esp1->num;
6087             segments1->q_start += esp1->num;
6088          } else if (esp1->op_type == GAPALIGN_DEL)
6089             segments1->s_start += esp1->num;
6090       } else
6091          break;
6092       esp1 = esp1->next;
6093    }
6094    /* Current esp is the first segment within the overlap region */
6095    segments1->s_end = segments1->s_start + esp1->num - 1;
6096    if (esp1->op_type == GAPALIGN_SUB)
6097       segments1->q_end = segments1->q_start + esp1->num - 1;
6098    else
6099       segments1->q_end = segments1->q_start;
6100 
6101    new_segment1 = segments1;
6102 
6103    for (esp = esp1->next; esp; esp = esp->next) {
6104       new_segment1->next = (BLASTHSPSegmentPtr)
6105          MemNew(sizeof(BLASTHSPSegment));
6106       new_segment1->next->q_start = new_segment1->q_end + 1;
6107       new_segment1->next->s_start = new_segment1->s_end + 1;
6108       new_segment1 = new_segment1->next;
6109       if (esp->op_type == GAPALIGN_SUB) {
6110          new_segment1->q_end += esp->num - 1;
6111          new_segment1->s_end += esp->num - 1;
6112       } else if (esp->op_type == GAPALIGN_INS) {
6113          new_segment1->q_end += esp->num - 1;
6114          new_segment1->s_end = new_segment1->s_start;
6115       } else {
6116          new_segment1->s_end += esp->num - 1;
6117          new_segment1->q_end = new_segment1->q_start;
6118       }
6119    }
6120 
6121    /* Now create the second segments list */
6122 
6123    segments2 = (BLASTHSPSegmentPtr) MemNew(sizeof(BLASTHSPSegment));
6124    segments2->q_start = hsp2->query.offset;
6125    segments2->s_start = hsp2->subject.offset;
6126    segments2->q_end = segments2->q_start + esp2->num - 1;
6127    segments2->s_end = segments2->s_start + esp2->num - 1;
6128 
6129    new_segment2 = segments2;
6130 
6131    for (esp = esp2->next; esp && new_segment2->s_end < end;
6132         esp = esp->next) {
6133       new_segment2->next = (BLASTHSPSegmentPtr)
6134          MemNew(sizeof(BLASTHSPSegment));
6135       new_segment2->next->q_start = new_segment2->q_end + 1;
6136       new_segment2->next->s_start = new_segment2->s_end + 1;
6137       new_segment2 = new_segment2->next;
6138       if (esp->op_type == GAPALIGN_INS) {
6139          new_segment2->s_end = new_segment2->s_start;
6140          new_segment2->q_end = new_segment2->q_start + esp->num - 1;
6141       } else if (esp->op_type == GAPALIGN_DEL) {
6142          new_segment2->s_end = new_segment2->s_start + esp->num - 1;
6143          new_segment2->q_end = new_segment2->q_start;
6144       } else if (esp->op_type == GAPALIGN_SUB) {
6145          new_segment2->s_end = new_segment2->s_start + esp->num - 1;
6146          new_segment2->q_end = new_segment2->q_start + esp->num - 1;
6147       }
6148    }
6149 
6150    new_segment1 = segments1;
6151    new_segment2 = segments2;
6152    intersection_found = 0;
6153    num1 = num2 = 0;
6154    while (new_segment1 && new_segment2 && !intersection_found) {
6155       if (new_segment1->s_end < new_segment2->s_start ||
6156           new_segment1->q_end < new_segment2->q_start) {
6157          new_segment1 = new_segment1->next;
6158          num1++;
6159          continue;
6160       }
6161       if (new_segment2->s_end < new_segment1->s_start ||
6162           new_segment2->q_end < new_segment1->q_start) {
6163          new_segment2 = new_segment2->next;
6164          num2++;
6165          continue;
6166       }
6167       diag1_start = new_segment1->s_start - new_segment1->q_start;
6168       diag2_start = new_segment2->s_start - new_segment2->q_start;
6169       diag1_end = new_segment1->s_end - new_segment1->q_end;
6170       diag2_end = new_segment2->s_end - new_segment2->q_end;
6171 
6172       if (diag1_start == diag1_end && diag2_start == diag2_end &&
6173           diag1_start == diag2_start) {
6174          /* Both segments substitutions, on same diagonal */
6175          intersection_found = 1;
6176          dist = new_segment2->s_end - new_segment1->s_start + 1;
6177          break;
6178       } else if (diag1_start != diag1_end && diag2_start != diag2_end) {
6179          /* Both segments gaps - must intersect */
6180          intersection_found = 3;
6181 
6182          dist = new_segment2->s_end - new_segment1->s_start + 1;
6183          op_type = GAPALIGN_INS;
6184          next_dist = new_segment2->q_end - new_segment1->q_start - dist + 1;
6185          if (new_segment2->q_end - new_segment1->q_start < dist) {
6186             dist = new_segment2->q_end - new_segment1->q_start + 1;
6187             op_type = GAPALIGN_DEL;
6188             next_dist = new_segment2->s_end - new_segment1->s_start - dist + 1;
6189          }
6190          break;
6191       } else if (diag1_start != diag1_end) {
6192          max_diag = MAX(diag1_start, diag1_end);
6193          min_diag = MIN(diag1_start, diag1_end);
6194          if (diag2_start >= min_diag && diag2_start <= max_diag) {
6195             intersection_found = 2;
6196             dist = diag2_start - min_diag + 1;
6197             if (new_segment1->s_end == new_segment1->s_start)
6198                next_dist = new_segment2->s_end - new_segment1->s_end + 1;
6199             else
6200                next_dist = new_segment2->q_end - new_segment1->q_end + 1;
6201             break;
6202          }
6203       } else if (diag2_start != diag2_end) {
6204          max_diag = MAX(diag2_start, diag2_end);
6205          min_diag = MIN(diag2_start, diag2_end);
6206          if (diag1_start >= min_diag && diag1_start <= max_diag) {
6207             intersection_found = 2;
6208             next_dist = max_diag - diag1_start + 1;
6209             if (new_segment2->s_end == new_segment2->s_start)
6210                dist = new_segment2->s_start - new_segment1->s_start + 1;
6211             else
6212                dist = new_segment2->q_start - new_segment1->q_start + 1;
6213             break;
6214          }
6215       }
6216       if (new_segment1->s_end <= new_segment2->s_end) {
6217          new_segment1 = new_segment1->next;
6218          num1++;
6219       } else {
6220          new_segment2 = new_segment2->next;
6221          num2++;
6222       }
6223    }
6224 
6225    if (intersection_found) {
6226       esp = NULL;
6227       for (index = 0; index < num1-1; index++)
6228          esp1 = esp1->next;
6229       for (index = 0; index < num2-1; index++) {
6230          esp = esp2;
6231          esp2 = esp2->next;
6232       }
6233       if (intersection_found < 3) {
6234          if (num1 > 0)
6235             esp1 = esp1->next;
6236          if (num2 > 0) {
6237             esp = esp2;
6238             esp2 = esp2->next;
6239          }
6240       }
6241       switch (intersection_found) {
6242       case 1:
6243          esp1->num = dist;
6244          esp1->next = esp2->next;
6245          esp2->next = NULL;
6246          break;
6247       case 2:
6248          esp1->num = dist;
6249          esp2->num = next_dist;
6250          esp1->next = esp2;
6251          if (esp)
6252             esp->next = NULL;
6253          break;
6254       case 3:
6255          esp1->num += dist;
6256          esp2->op_type = op_type;
6257          esp2->num = next_dist;
6258          esp1->next = esp2;
6259          if (esp)
6260             esp->next = NULL;
6261          break;
6262       default: break;
6263       }
6264       hsp1->query.end = hsp2->query.end;
6265       hsp1->subject.end = hsp2->subject.end;
6266       hsp1->query.length = hsp1->query.end - hsp1->query.offset;
6267       hsp1->subject.length = hsp1->subject.end - hsp1->subject.offset;
6268    }
6269 
6270    return (Boolean) intersection_found;
6271 }
6272 
BLASTHspContained(BLAST_HSPPtr hsp1,BLAST_HSPPtr hsp2)6273 static Boolean BLASTHspContained(BLAST_HSPPtr hsp1, BLAST_HSPPtr hsp2)
6274 {
6275    Boolean hsp_start_is_contained=FALSE, hsp_end_is_contained=FALSE;
6276 
6277    if (hsp1->score > hsp2->score ||
6278        SIGN(hsp2->query.frame) != SIGN(hsp1->query.frame) ||
6279        SIGN(hsp2->subject.frame) != SIGN(hsp1->subject.frame))
6280       return FALSE;
6281 
6282    if (CONTAINED_IN_HSP(hsp2->query.offset, hsp2->query.end, hsp1->query.offset, hsp2->subject.offset, hsp2->subject.end, hsp1->subject.offset) == TRUE) {
6283       hsp_start_is_contained = TRUE;
6284    }
6285    if (CONTAINED_IN_HSP(hsp2->query.offset, hsp2->query.end, hsp1->query.end, hsp2->subject.offset, hsp2->subject.end, hsp1->subject.end) == TRUE) {
6286       hsp_end_is_contained = TRUE;
6287    }
6288 
6289    return (hsp_start_is_contained && hsp_end_is_contained);
6290 }
6291 
6292 /*
6293   Merges the hits from different chunks of the subject sequence that
6294   have been searched separately
6295 */
6296 static BLAST_HitListPtr
BLASTMergeHitLists(BlastSearchBlkPtr search,BLAST_HitListPtr hitlist1,BLAST_HitListPtr hitlist2,Int4 start,Boolean merge_hsps)6297 BLASTMergeHitLists(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist1,
6298                    BLAST_HitListPtr hitlist2, Int4 start, Boolean merge_hsps)
6299 {
6300    BLAST_HSPPtr hsp, hsp_var, PNTR hspp1, PNTR hspp2;
6301    Int4 index, index1, index2;
6302    Int4 hspcnt1, hspcnt2, new_hspcnt = 0;
6303    BLAST_HSPPtr PNTR new_hsp_array;
6304 
6305    if (hitlist1 == NULL) {
6306       hitlist1 = (BLAST_HitListPtr)
6307          MemDup(hitlist2, sizeof(BLAST_HitList));
6308       hitlist1->hsp_array = (BLAST_HSPPtr PNTR)
6309          MemNew(hitlist2->hspmax*sizeof(BLAST_HSPPtr));
6310       MemCpy(hitlist1->hsp_array, hitlist2->hsp_array,
6311              hitlist2->hspcnt*sizeof(BLAST_HSPPtr));
6312       return hitlist1;
6313    } else {
6314       /* In case these have changed */
6315       hitlist1->exact_match_array = hitlist2->exact_match_array;
6316       hitlist1->exact_match_max = hitlist2->exact_match_max;
6317    }
6318 
6319    hspcnt1 = hspcnt2 = 0;
6320 
6321    /* Put all HSPs that intersect the overlap region at the front of the
6322       respective HSP arrays. */
6323    for (index = 0; index < hitlist1->hspcnt; index++) {
6324       hsp = hitlist1->hsp_array[index];
6325       if (hsp->subject.end > start) {
6326          /* At least part of this HSP lies in the overlap strip. */
6327          hsp_var = hitlist1->hsp_array[hspcnt1];
6328          hitlist1->hsp_array[hspcnt1] = hsp;
6329          hitlist1->hsp_array[index] = hsp_var;
6330          ++hspcnt1;
6331       }
6332    }
6333    for (index = 0; index < hitlist2->hspcnt; index++) {
6334       hsp = hitlist2->hsp_array[index];
6335       if (hsp->subject.offset < start + DBSEQ_CHUNK_OVERLAP) {
6336          /* At least part of this HSP lies in the overlap strip. */
6337          hsp_var = hitlist2->hsp_array[hspcnt2];
6338          hitlist2->hsp_array[hspcnt2] = hsp;
6339          hitlist2->hsp_array[index] = hsp_var;
6340          ++hspcnt2;
6341       }
6342    }
6343    hspp1 = hitlist1->hsp_array;
6344    hspp2 = hitlist2->hsp_array;
6345 
6346    HeapSort(hspp1, hspcnt1, sizeof(BLAST_HSPPtr), diag_compare_hsps);
6347    HeapSort(hspp2, hspcnt2, sizeof(BLAST_HSPPtr), diag_compare_hsps);
6348 
6349    for (index=0; index<hspcnt1; index++) {
6350       for (index1=0; index1<hspcnt2; index1++) {
6351          if (hspp2[index1] &&
6352              hspp2[index1]->query.frame == hspp1[index]->query.frame &&
6353              hspp2[index1]->subject.frame == hspp1[index]->subject.frame &&
6354              ABS(diag_compare_hsps(&hspp1[index], &hspp2[index1])) <
6355              OVERLAP_DIAG_CLOSE) {
6356             if (merge_hsps) {
6357                if (BLASTMergeHsps(hspp1[index], hspp2[index1], start)) {
6358                   /* Free the second HSP. */
6359                   hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
6360                }
6361             } else { /* No gap information available */
6362                if (BLASTHspContained(hspp1[index], hspp2[index1])) {
6363                   /* Point the first HSP to the new HSP; */
6364                   hspp1[index] = BLAST_HSPFree(hspp1[index]);
6365                   hspp1[index] = hspp2[index1];
6366                   hspp2[index1] = NULL;
6367                   /* This HSP has been removed, so break out of the inner
6368                      loop */
6369                   break;
6370                } else if (BLASTHspContained(hspp2[index1], hspp1[index])) {
6371                   hspp2[index1] = BLAST_HSPFree(hspp2[index1]);
6372                }
6373             }
6374          } else {
6375             /* This and remaining HSPs are too far from the one being
6376                checked */
6377             break;
6378          }
6379       }
6380    }
6381 
6382    HspArrayPurge(hitlist2->hsp_array, hitlist2->hspcnt, FALSE);
6383 
6384    /* The new number of HSPs is now the sum of the remaining counts in the
6385       two lists, but if there is a restriction on the number of HSPs to keep,
6386       it might have to be reduced. */
6387    new_hspcnt = hitlist2->hspcnt + hitlist1->hspcnt;
6388    if (search->pbp->hsp_num_max)
6389       new_hspcnt = MIN(new_hspcnt, search->pbp->hsp_num_max);
6390 
6391    if (new_hspcnt >= hitlist1->hspmax-1 && hitlist1->do_not_reallocate == FALSE) {
6392       Int4 new_allocated = 2*new_hspcnt;
6393       if (search->pbp->hsp_num_max)
6394          new_allocated = MIN(new_allocated, search->pbp->hsp_num_max);
6395       new_hsp_array = (BLAST_HSPPtr PNTR)
6396          Realloc(hitlist1->hsp_array, new_allocated*sizeof(BLAST_HSPPtr));
6397       if (new_hsp_array == NULL) {
6398          ErrPostEx(SEV_WARNING, 0, 0, "UNABLE to reallocate in BlastSaveCurrentHsp for ordinal id %ld, continuing with fixed array of %ld HSP's", (long) search->subject_id, (long) hitlist1->hspmax);
6399          hitlist1->do_not_reallocate = TRUE;
6400       } else {
6401          hitlist1->hsp_array = new_hsp_array;
6402          hitlist1->hspmax = new_allocated;
6403       }
6404       new_hspcnt = MIN(new_hspcnt, hitlist1->hspmax);
6405    }
6406 
6407    if (new_hspcnt >= hitlist2->hspcnt + hitlist1->hspcnt) {
6408       /* All HSPs from both arrays are saved */
6409       for (index=hitlist1->hspcnt, index1=0;
6410            index1<hitlist2->hspcnt; index1++) {
6411          if (hitlist2->hsp_array[index1] != NULL)
6412             hitlist1->hsp_array[index++] = hitlist2->hsp_array[index1];
6413       }
6414    } else {
6415       /* Not all HSPs are be saved; sort both arrays by score and save only
6416          the new_hspcnt best ones.
6417          For the merged set of HSPs, allocate array the same size as in the
6418          old HSP list. */
6419       new_hsp_array = (BLAST_HSP**)
6420          malloc(hitlist1->hspmax*sizeof(BLAST_HSP*));
6421       HeapSort(hitlist1->hsp_array, hitlist1->hspcnt,
6422                sizeof(BLAST_HSP*), score_compare_hsps);
6423       HeapSort(hitlist2->hsp_array, hitlist2->hspcnt, sizeof(BLAST_HSP*),
6424                score_compare_hsps);
6425       index1 = index2 = 0;
6426       for (index = 0; index < new_hspcnt; ++index) {
6427          if (index1 < hitlist1->hspcnt &&
6428              (index2 >= hitlist2->hspcnt ||
6429              (hitlist1->hsp_array[index1]->score >=
6430              hitlist2->hsp_array[index2]->score))) {
6431             new_hsp_array[index] = hitlist1->hsp_array[index1];
6432             ++index1;
6433          } else {
6434             new_hsp_array[index] = hitlist2->hsp_array[index2];
6435             ++index2;
6436          }
6437       }
6438       /* Free the extra HSPs that could not be saved */
6439       for ( ; index1 < hitlist1->hspcnt; ++index1) {
6440          hitlist1->hsp_array[index1] =
6441             BLAST_HSPFree(hitlist1->hsp_array[index1]);
6442       }
6443       for ( ; index2 < hitlist2->hspcnt; ++index2) {
6444          hitlist2->hsp_array[index2] =
6445             BLAST_HSPFree(hitlist2->hsp_array[index2]);
6446       }
6447       /* Point hitlist1's HSP array to the new one */
6448       hitlist1->hsp_array = (BLAST_HSP**) MemFree(hitlist1->hsp_array);
6449       hitlist1->hsp_array = new_hsp_array;
6450    }
6451 
6452    hitlist1->hspcnt = index;
6453    /* Second HSP list now does not own any HSPs */
6454    hitlist2->hspcnt = 0;
6455 
6456    return hitlist1;
6457 }
6458 
6459 /* Remove HSPs that do not touch the overlap region and have initial evalue
6460    estimate more than 10 times higher than the cutoff.
6461 */
6462 static BlastSearchBlkPtr
BlastReapPartialHitlistByEvalue(BlastSearchBlkPtr search,Int4 start)6463 BlastReapPartialHitlistByEvalue(BlastSearchBlkPtr search, Int4 start)
6464 {
6465    BLAST_HSPPtr hsp;
6466    Int4 index, hspcnt;
6467    FloatHi searchsp_eff;
6468    BLAST_KarlinBlkPtr PNTR kbp;
6469    Int4 context;
6470    Uint4 query_num; /* AM: Support for query concatenation. */
6471 
6472    if (search->pbp->gapped_calculation)
6473       kbp = search->sbp->kbp_gap;
6474    else
6475       kbp = search->sbp->kbp;
6476 
6477    hspcnt = search->current_hitlist->hspcnt;
6478    for (index=0; index<hspcnt; index++) {
6479       hsp = search->current_hitlist->hsp_array[index];
6480 
6481       if (hsp->subject.offset > start + DBSEQ_CHUNK_OVERLAP) {
6482 	 if (search->pbp->mb_params)
6483             context = BinarySearchInt4(hsp->query.offset,
6484                                        search->query_context_offsets,
6485                                        (Int4) (search->last_context+1));
6486          else
6487             context = (Int4) hsp->context;
6488 
6489 	    /* AM: Changed to support query concatenation. */
6490 	    if( !search->mult_queries )
6491               searchsp_eff = (FloatHi) search->dblen_eff *
6492                              (FloatHi) search->context[context].query->effective_length;
6493             else
6494 	    {
6495 	      query_num = GetQueryNum( search->mult_queries,
6496 	                               hsp->query.offset,
6497 				       hsp->query.end,
6498 				       hsp->query.frame );
6499               searchsp_eff = search->mult_queries->SearchSpEff[query_num];
6500 	    }
6501 
6502          if (kbp[context]) {
6503             /* kbp[context] == NULL means that this alignment has been
6504                extended across the boundary between different query sequences.
6505                Leave it like this for now */
6506             hsp->evalue = BlastKarlinStoE_simple(hsp->score, kbp[context],
6507                                                  searchsp_eff);
6508 
6509             if (hsp->evalue > 10*search->pbp->cutoff_e) {
6510                hsp = BLAST_HSPFree(hsp);
6511                search->current_hitlist->hsp_array[index] = NULL;
6512             }
6513          }
6514       }
6515    }
6516    search->current_hitlist->hspcnt =
6517       HspArrayPurge(search->current_hitlist->hsp_array, hspcnt, FALSE);
6518    return search;
6519 }
6520 
6521 /*
6522 	Performs a BLAST search using a sequence from obtained from readdb.
6523 */
6524 Int2 LIBCALL
BLASTPerformSearchWithReadDb(BlastSearchBlkPtr search,Int4 sequence_number)6525 BLASTPerformSearchWithReadDb (BlastSearchBlkPtr search, Int4 sequence_number)
6526 
6527 {
6528 	Int4 subject_length;
6529 	Uint1Ptr subject_seq=NULL;
6530 
6531         /* This mutex should not be necessary - readdb seems to have
6532          * synchronization issues when dealing with multiple volumes
6533          * from multiple threads.  This mutex fixes the symptom. */
6534 
6535         static int init_mutex = 0;
6536         static TNlmMutex wrap_readdb_mutex = 0;
6537 
6538         if (! init_mutex) {
6539             init_mutex++;
6540             NlmMutexInit(& wrap_readdb_mutex);
6541         }
6542 
6543         NlmMutexLock(wrap_readdb_mutex);
6544 
6545 	subject_length = readdb_get_sequence(search->rdfp, sequence_number, &subject_seq);
6546 
6547         NlmMutexUnlock(wrap_readdb_mutex);
6548 
6549 	search->dblen_eff_real += MAX(subject_length-search->length_adjustment, 1);
6550 	search->subject_id = sequence_number;
6551 
6552         return BLASTPerformSearch(search, subject_length, subject_seq);
6553 }
6554 
6555 /*
6556 	Performs a BLAST search with a subject sequence that is passed in.
6557 	Used when an entire database is being scanned (by
6558 	BLASTPerformSearchWithReadDb) and when only two seqs are being
6559 	compared.
6560 */
6561 Int2 LIBCALL
BLASTPerformSearch(BlastSearchBlkPtr search,Int4 subject_length,Uint1Ptr subject_seq)6562 BLASTPerformSearch (BlastSearchBlkPtr search, Int4 subject_length, Uint1Ptr subject_seq)
6563 
6564 {
6565 	Int2 status;
6566 
6567 	if (search->pbp->two_pass_method)
6568 	{
6569 		status = BLASTPerform2PassSearch(search, subject_length, subject_seq);
6570 	}
6571 	else
6572 	{
6573 		status = BLASTPerformFinalSearch(search, subject_length, subject_seq);
6574 	}
6575 
6576 	return status;
6577 }
6578 
6579 /*
6580 
6581 	Performs a BLAST search using the two-pass method: the first pass
6582 	looks for multiple initial hits and then performs a second pass
6583 	(with single hits extended) wiht a lower T value.
6584 
6585 	 Arguments are:
6586 
6587 	 - search: BlastSearchBlkPtr returned by SetUpBlastSearch, call
6588 		SetUpBlastSearch before calling this function.
6589 	 - sequence_number: number assigned to sequence (by user).  The
6590 		"readdb" library uses this number to access the sequence.
6591 		This number should be zero if it's not important.
6592 	 - subject_length: the length of the database sequence (not the length
6593 		allocated in *subject_seq).
6594 	 - subject_seq: CharPtr pointing to the sequence.
6595 
6596 	NOTE: static variables in PerformBlastSearch for subject_seq and
6597 	allocated_length are not an option as they can't be deallocated
6598 	after the last call and they are NOT MP-safe.
6599 */
6600 
6601 Int2 LIBCALL
BLASTPerform2PassSearch(BlastSearchBlkPtr search,Int4 subject_length,Uint1Ptr subject_seq)6602 BLASTPerform2PassSearch (BlastSearchBlkPtr search, Int4 subject_length, Uint1Ptr subject_seq)
6603 
6604 {
6605 	Int2 outer_frame, outer_frame_max, status, outer_frame_min;
6606 	Int4 prot_length;
6607 	Uint1Ptr prot_seq;
6608 
6609 	search->current_hitlist_purge = TRUE; /* The default. */
6610 	outer_frame_max = 1;
6611 
6612         if (StringCmp(search->prog_name, "tblastn") == 0
6613             || StringCmp(search->prog_name, "tblastx") == 0
6614             || StringCmp(search->prog_name, "psitblastn") == 0)
6615 	{
6616 		outer_frame_min = -3;
6617 		outer_frame_max = 3;
6618 	}
6619 	else
6620 	{
6621 		outer_frame_min = 0;
6622 		outer_frame_max = 0;
6623 	}
6624 
6625 	for (outer_frame=outer_frame_min; outer_frame<=outer_frame_max; outer_frame++)
6626 	{
6627 		search->subject->frame = outer_frame;
6628                if (StringCmp("tblastn", search->prog_name) == 0
6629                     || StringCmp("tblastx", search->prog_name) == 0
6630                     || StringCmp("psitblastn", search->prog_name) == 0)
6631 		{
6632 			if (outer_frame == 0)
6633 				continue;
6634 			prot_seq = search->translation_buffer;
6635 			prot_length = BlastTranslateUnambiguousSequence(search, subject_length, prot_seq, subject_seq, outer_frame);
6636 
6637                         if(search->pbp->is_rps_blast) {
6638                             /* SEG Filtering of query DNA sequence */
6639 
6640                             rpsBlastFilterSequence(search, outer_frame,
6641                                                    prot_seq, prot_length,
6642                                                    subject_length);
6643                         }
6644 
6645 			BlastSequenceAddSequence(search->subject, NULL, prot_seq, prot_length, subject_length, 0);
6646 		}
6647 		else
6648 		{
6649 			BlastSequenceAddSequence(search->subject, NULL, subject_seq-1, subject_length, subject_length, 0);
6650 		}
6651 
6652 		search->prelim = TRUE;
6653 		search->wfp = search->wfp_first;
6654 
6655 /* First pass with multiple hits. */
6656 		status = BlastExtendWordSearch(search, TRUE);
6657 	/* status = 0 means NO significant matches found on first pass.*/
6658 		if (status > 0)
6659 		{	/* Match found on initial pass, DO second pass. */
6660 			status = BLASTPerformFinalSearch(search, subject_length, subject_seq);
6661 			break;
6662 		}
6663 		else
6664 		{ /* NULL out the sequence to prevent unintentional FREE's
6665 			(it's in "*subject_seq"), but delete the descriptor. */
6666 			search->subject->sequence = NULL;
6667 		}
6668 
6669 		if (status < 0)
6670 		{		/* Error */
6671 			ErrPostEx(SEV_FATAL, 1, 0, "BlastExtendWordSearch returned non-zero status");
6672 			return 1;
6673 		}
6674 	}
6675 
6676 /* NULL out the sequence, leave in the proper length which is still needed
6677 for the significance evaluation. */
6678 	search->subject->length = subject_length;
6679 	search->subject->sequence = NULL;
6680 	search->subject->sequence_start = NULL;
6681 
6682 	return 0;
6683 }
6684 
6685 /*
6686 
6687 	Performs a BLAST search using the two-pass method: the first pass
6688 	looks for multiple initial hits and then performs a second pass
6689 	(with single hits extended) wiht a lower T value.
6690 
6691 	 Arguments are:
6692 
6693 	 - search: BlastSearchBlkPtr returned by SetUpBlastSearch, call
6694 		SetUpBlastSearch before calling this function.
6695 	 - sequence_number: number assigned to sequence (by user).  The
6696 		"readdb" library uses this number to access the sequence.
6697 		This number should be zero if it's not important.
6698 	 - subject_length: the length of the database sequence (not the length
6699 		allocated in *subject_seq).
6700 	 - subject_seq: CharPtr pointing to the sequence.
6701 
6702 	NOTE: static variables in PerformBlastSearch for subject_seq and
6703 	allocated_length are not an option as they can't be deallocated
6704 	after the last call and they are NOT MP-safe.
6705 */
6706 
6707 Int2 LIBCALL
BLASTPerformFinalSearch(BlastSearchBlkPtr search,Int4 subject_length,Uint1Ptr subject_seq)6708 BLASTPerformFinalSearch (BlastSearchBlkPtr search, Int4 subject_length, Uint1Ptr subject_seq)
6709 
6710 {
6711     BLAST_HitListPtr current_hitlist, hitlist = NULL;
6712     Int2 inner_frame, inner_frame_max, inner_frame_min, status;
6713     Int4 real_length, length, start = 0, num_chunks, index;
6714     Uint1Ptr prot_seq;
6715 
6716     BlastHitListPurge(search->current_hitlist);
6717     if (subject_length == 0)
6718        /* Normal return */
6719 	return 0;
6720 
6721     BlastSequenceAddSequence(search->subject, NULL, subject_seq-1, subject_length, subject_length, 0);
6722     search->current_hitlist_purge = TRUE; /* The default. */
6723     inner_frame_max = 1;
6724     if (search->prog_number == blast_type_tblastn
6725         || search->prog_number == blast_type_tblastx
6726         || search->prog_number == blast_type_psitblastn) {
6727         inner_frame_min = -3;
6728         inner_frame_max = 3;
6729     } else if (search->prog_number == blast_type_blastn) {
6730         inner_frame_min = 1;
6731         inner_frame_max = 1;
6732     } else {
6733         inner_frame_min = 0;
6734         inner_frame_max = 0;
6735     }
6736 
6737     /* Match found on initial pass, DO second pass. */
6738     for (inner_frame=inner_frame_min; inner_frame<=inner_frame_max; inner_frame++) {
6739         search->subject->frame = inner_frame;
6740         if (search->prog_number == blast_type_tblastn
6741             || search->prog_number == blast_type_tblastx
6742             || search->prog_number == blast_type_psitblastn) {
6743             if (inner_frame == inner_frame_min) /* Purge on 1st call. */
6744                 search->current_hitlist_purge = TRUE;
6745             else
6746                 search->current_hitlist_purge = FALSE;
6747             if (inner_frame == 0)
6748                 continue;
6749 	    start = 0;
6750             prot_seq = search->translation_buffer;
6751             real_length = BlastTranslateUnambiguousSequence(search, subject_length, prot_seq, subject_seq, inner_frame);
6752 
6753             if(search->pbp->is_rps_blast) {
6754                 /* SEG Filtering of query DNA sequence */
6755 
6756                 rpsBlastFilterSequence(search, inner_frame,
6757                                        prot_seq, real_length,
6758                                        subject_length);
6759             }
6760 
6761             /* subject seq stays the same, except for tblast[nx]. */
6762             BlastSequenceAddSequence(search->subject, NULL, prot_seq, real_length, subject_length, 0);
6763             if (real_length == 0)
6764                 continue;
6765         } else
6766            real_length = subject_length;
6767 
6768         search->prelim = FALSE;
6769         /* Calculate some cutoff scores, these depend upon the seq lengths.*/
6770         /* For blastn  and gapped calc. use the cutoff's originally found. */
6771         if (!search->pbp->gapped_calculation &&
6772             search->prog_number != blast_type_blastn) {
6773             CalculateSecondCutoffScore(search, search->subject->length, &search->pbp->ignore_small_gaps, &search->pbp->cutoff_s_second, &search->pbp->cutoff_big_gap);
6774         }
6775 
6776 #ifdef BLAST_COLLECT_STATS
6777         search->second_pass_trys++;
6778 #endif
6779 
6780         if (search->pbp->mb_params)
6781            /* sequence_start is reserved for ncbi4na encoded sequence
6782               in this case */
6783            search->subject->sequence_start = NULL;
6784 
6785         length = real_length;
6786         /* Split subject sequence into chunks if it is too long */
6787         num_chunks = (length - DBSEQ_CHUNK_OVERLAP) /
6788            (MAX_DBSEQ_LEN - DBSEQ_CHUNK_OVERLAP) + 1;
6789         search->subject->original_length = 0;
6790         if (search->pbp->mb_params && !search->rdfp) {
6791            /* Coming from the 2 sequences engine: save the entire
6792               ncbi4na sequence in search->subject->sequence_start
6793            */
6794            Uint1Ptr seq_blastna, seq_2na;
6795            Uint1 rem;
6796 
6797            search->subject->sequence_start =
6798               (Uint1Ptr) MemNew(subject_length + 1);
6799            seq_blastna = search->subject->sequence_start;
6800            seq_2na = search->subject->sequence;
6801            rem = 3;
6802            *seq_blastna = (Uint1) ncbi4na_to_blastna[NULLB];
6803            seq_blastna++;
6804            for (index=0; index<subject_length; index++) {
6805               *seq_blastna =
6806                  (Uint1) ncbi4na_to_blastna[(1 << READDB_UNPACK_BASE_N(*seq_2na, rem))];
6807               seq_blastna++;
6808               if (rem>0) rem--;
6809               else {
6810                  rem = 3;
6811                     seq_2na++;
6812               }
6813            }
6814         }
6815 
6816         for (index=0; index<num_chunks; index++) {
6817            length = MIN(real_length-start, MAX_DBSEQ_LEN);
6818            search->subject->length = length;
6819            /* THE BLAST SEARCH _IS_ HERE! */
6820            if (BlastExtendWordSearch(search, search->pbp->multiple_hits_only) < 0) {
6821               /* Error occurred in BlastExtendWordSearch */
6822               return 1;
6823            }
6824            /* HSP's were not saved in any special order, sort. */
6825            current_hitlist = search->current_hitlist;
6826            if (current_hitlist && current_hitlist->do_not_reallocate == FALSE)
6827               HeapSort(current_hitlist->hsp_array, current_hitlist->hspcnt,sizeof(BLAST_HSPPtr), score_compare_hsps);
6828            if (search->pbp->gapped_calculation &&
6829                search->prog_number != blast_type_blastn) {
6830               status = BlastPreliminaryGappedScore(search, search->subject->sequence, search->subject->length, inner_frame);
6831               status = BlastGetGappedScore(search, search->subject->length, search->subject->sequence, inner_frame);
6832            }
6833 #if 1
6834            else if (!search->pbp->do_sum_stats && !search->pbp->mb_params) {
6835               status = BlastNTPreliminaryGappedScore(search, search->subject->sequence, search->subject->length);
6836               if (status < 0)
6837                  return status;
6838               status = BlastNTGetGappedScore(search, search->subject->length, search->subject->sequence);
6839               if (status < 0)
6840                  return status;
6841            }
6842 #endif
6843            if (num_chunks > 1) {
6844               AdjustOffsetsInBLASTHitList(search->current_hitlist, start);
6845 
6846               if (search->current_hitlist->hspcnt > 0) {
6847                  search = BlastReapPartialHitlistByEvalue(search, start);
6848                  hitlist = BLASTMergeHitLists(search, hitlist,
6849                                               search->current_hitlist, start,
6850                                               (search->pbp->mb_params != NULL));
6851               }
6852               start += length - DBSEQ_CHUNK_OVERLAP;
6853               search->subject->original_length = start;
6854               if (search->prog_number == blast_type_blastn)
6855                  search->subject->sequence +=
6856                     (length - DBSEQ_CHUNK_OVERLAP)/READDB_COMPRESSION_RATIO;
6857               else
6858                  search->subject->sequence += length - DBSEQ_CHUNK_OVERLAP;
6859               search->current_hitlist->hspcnt =
6860                  search->current_hitlist->hspcnt_max = 0;
6861            }
6862            else if (search->prog_number == blast_type_tblastn ||
6863                     search->prog_number == blast_type_psitblastn)
6864            {
6865                  hitlist = BLASTMergeHitLists(search, hitlist, search->current_hitlist, 0, FALSE);
6866                  MemSet((VoidPtr) search->current_hitlist->hsp_array, 0,
6867                            sizeof(BLAST_HSPPtr)*(search->current_hitlist->hspcnt_max));
6868                  search->current_hitlist->hspcnt = search->current_hitlist->hspcnt_max = 0;
6869            }
6870         }
6871     } /* for (inner_frame=inner_frame_min; inner_frame */
6872 
6873     if (hitlist) {
6874        MemFree(search->current_hitlist->hsp_array);
6875        MemCpy(search->current_hitlist, hitlist, sizeof(BLAST_HitList));
6876        MemFree(hitlist);
6877        if (!search->pbp->mb_params)
6878           search->subject->sequence = search->subject->sequence_start + 1;
6879     }
6880 
6881     /* NULL out the sequence, leave in the proper length which is still
6882        needed for the significance evaluation. */
6883     search->subject->length = subject_length;
6884     search->subject->sequence = NULL;
6885     if (!search->pbp->mb_params)
6886        /* This holds the ncbi4na-encoded sequence for Mega BLAST */
6887        search->subject->sequence_start = NULL;
6888 
6889     return 0;
6890 }
6891 
6892 
6893 
6894 /*
6895 	Gets the translation array for a give genetic code.
6896 	This array is optimized for the NCBI2na alphabet.
6897 	The reverse complement can also be spcified.
6898 
6899 	Int4 id: The number of the NCBI genetic code,
6900 	CharPtr name: The name of the NCBI genetic code,
6901 		(only one of id or name must be specified).
6902 	Boolean reverse_complement: translations for reverse
6903 		complement are needed.
6904 */
6905 
6906 Uint1Ptr
GetPrivatTranslationTable(CharPtr genetic_code,Boolean reverse_complement)6907 GetPrivatTranslationTable(CharPtr genetic_code, Boolean reverse_complement)
6908 
6909 {
6910 	Int2 index1, index2, index3, bp1, bp2, bp3;
6911 	Int2 codon;
6912   	SeqMapTablePtr smtp;
6913 	Uint1Ptr translation;
6914 /* The next array translate between the ncbi2na rep's and
6915 the rep's used by the genetic_code tables.  The rep used by the
6916 genetic code arrays is in mapping: T=0, C=1, A=2, G=3 */
6917   	static Uint1 mapping[4] = {2, /* A in ncbi2na */
6918        	               1, /* C in ncbi2na. */
6919        	               3, /* G in ncbi2na. */
6920        	               0 /* T in ncbi2na. */ };
6921 
6922 
6923 	if (genetic_code == NULL)
6924 		return NULL;
6925 
6926 	translation = MemNew(64*sizeof(Uint1));
6927 	if (translation == NULL)
6928 		return NULL;
6929 
6930 	smtp = SeqMapTableFind(Seq_code_ncbistdaa, Seq_code_ncbieaa);
6931 
6932 	for (index1=0; index1<4; index1++)
6933 	{
6934 		for (index2=0; index2<4; index2++)
6935 		{
6936 			for (index3=0; index3<4; index3++)
6937 			{
6938 /*
6939 The reverse complement codon is saved in it's orginal (non-complement)
6940 form AND with the high-order bits reversed from the non-complement form,
6941 as this is how they appear in the sequence.
6942 */
6943 			   if (reverse_complement)
6944 			   {
6945 				bp1 = 3 - index1;
6946 				bp2 = 3 - index2;
6947 				bp3 = 3 - index3;
6948 			   	codon = (mapping[bp1]<<4) + (mapping[bp2]<<2) + (mapping[bp3]);
6949 			   	translation[(index3<<4) + (index2<<2) + index1] = SeqMapTableConvert(smtp, genetic_code[codon]);
6950 			   }
6951 			   else
6952 			   {
6953 			   	codon = (mapping[index1]<<4) + (mapping[index2]<<2) + (mapping[index3]);
6954 			   	translation[(index1<<4) + (index2<<2) + index3] = SeqMapTableConvert(smtp, genetic_code[codon]);
6955 			   }
6956 
6957 			}
6958 		}
6959 	}
6960 	return translation;
6961 }	/* GetPrivatTranslationTable */
6962 
6963 /* Attach the "sequence" pointer to the BlastSequenceBlkPtr. sequence_start may be the
6964 actual start of the sequence (this pointer is kept for deallocation purposes).  The
6965 sequence may start before "sequence" starts as there may be a sentinel (i.e., NULLB)
6966 before the start of the sequence.  When the extension function extends this way it
6967 can tell that there is a NULLB there and stop the extension.
6968 
6969 */
6970 
6971 Int2 LIBCALL
BlastSequenceAddSequence(BlastSequenceBlkPtr sequence_blk,Uint1Ptr sequence,Uint1Ptr sequence_start,Int4 length,Int4 original_length,Int4 effective_length)6972 BlastSequenceAddSequence (BlastSequenceBlkPtr sequence_blk, Uint1Ptr sequence, Uint1Ptr sequence_start, Int4 length, Int4 original_length, Int4 effective_length)
6973 
6974 {
6975 	if (sequence_blk == NULL)
6976 		return 1;
6977 
6978 	if (sequence == NULL && sequence_start != NULL)
6979 	{
6980 		sequence_blk->sequence = sequence_start+1;
6981 	}
6982 	else if (sequence != NULL)
6983 	{
6984 		sequence_blk->sequence = sequence;
6985 	}
6986 	sequence_blk->sequence_start = sequence_start;
6987 	sequence_blk->length = length;
6988 	sequence_blk->original_length = original_length;
6989 	sequence_blk->effective_length = effective_length;
6990 
6991 	return 0;
6992 }
6993 
6994 /*
6995 	Select the appropriate wordfinder and then perform the search.
6996 	The "wordfinder's" called here look through the already found
6997 	words and extend those above a set limit ("T").
6998 
6999 	These wordfinders operate in two modes.  One is the "preliminary"
7000 	mode (search->prelim is TRUE); the wordfinders attempt to extend
7001 	an initial hit.  If they succeed at all, they return a positive
7002 	return status.  On the second pass (search->prelim is FALSE)
7003 	only those db seqs with hits are further investigated.
7004 
7005 */
7006 static Int4
BlastExtendWordSearch(BlastSearchBlkPtr search,Boolean multiple_hits)7007 BlastExtendWordSearch(BlastSearchBlkPtr search, Boolean multiple_hits)
7008 {
7009 	Int4 status=0;
7010 
7011 
7012 	/* multiple hits structure needed to perform mh extensions. */
7013 	if (multiple_hits == TRUE && search->ewp_params->multiple_hits == FALSE)
7014 		return -1;
7015 
7016 	if (multiple_hits == TRUE)
7017 		status = BlastWordFinder_mh(search);
7018 	else
7019 		status = BlastWordFinder(search);
7020 
7021 	return status;
7022 }
7023 
7024 /*----------   search a sequence with 1 Context, 1 Letter per byte  ---------*/
7025 static Int4
BlastWordFinder(BlastSearchBlkPtr search)7026 BlastWordFinder(BlastSearchBlkPtr search)
7027 {
7028 	BLAST_WordFinderPtr	wfp;
7029 	LookupTablePtr		lookup;
7030 	BLAST_ParameterBlkPtr	pbp;
7031 
7032 	pbp = search->pbp;
7033 	if (search->prelim == TRUE)
7034 	{
7035 		wfp=search->wfp_first;
7036 		if (pbp->cutoff_s2_set == TRUE)
7037 			pbp->cutoff_s2 = pbp->cutoff_s2_max;
7038 		else
7039 			pbp->cutoff_s2 = MIN(pbp->cutoff_s_first, pbp->cutoff_s2_max);
7040 		pbp->X = pbp->dropoff_1st_pass;
7041 	}
7042 	else
7043 	{
7044 		wfp=search->wfp_second;
7045 		if (!search->pbp->mb_params) {
7046 		   if (pbp->cutoff_s2_set == TRUE)
7047 		      pbp->cutoff_s2 = pbp->cutoff_s2_max;
7048 		   else
7049 		      pbp->cutoff_s2 = MIN(pbp->cutoff_s_second,
7050 					   pbp->cutoff_s2_max);
7051 		}
7052 		pbp->X = pbp->dropoff_2nd_pass;
7053 	}
7054 
7055 	lookup = wfp->lookup;
7056 
7057 	if (search->prog_number == blast_type_blastn)
7058 	{
7059 	   if (search->pbp->mb_params)
7060 	      return MegaBlastWordFinder(search, lookup);
7061 	   else
7062 	      return BlastNtWordFinder(search, lookup);
7063 	}
7064 	else
7065 	{
7066 		return BlastWordFinder_contig(search, lookup);
7067 	}
7068 }
7069 
7070 /* This function was updated to use mod_lt instead of the original lookup table,
7071  * but was not heavily optimized or tested.
7072  * (Modifications listed in comments before BlastWordFinder_mh_contig.)
7073  * -cfj
7074  */
7075 
7076 /*
7077 	Search a sequence with contiguous words.
7078 */
7079 static Int4
BlastWordFinder_contig(BlastSearchBlkPtr search,LookupTablePtr lookup)7080 BlastWordFinder_contig(BlastSearchBlkPtr search, LookupTablePtr lookup)
7081 {
7082 	register Uint1Ptr	s, s_end;
7083 	register Int4	char_size, lookup_index, mask;
7084 	register BLAST_Diag	diag, diag_tmp, real_diag;
7085 	BLAST_ExtendWordPtr     ewp;
7086 	BLAST_ExtendWordParamsPtr	ewp_params;
7087 	Boolean			prelim, succeed_to_right;
7088 	Uint1Ptr			subject0;
7089 	CfjModStruct *combo_array;
7090 	Int4	 index=0;
7091 	register ModLookupPosition hit_info;
7092 	Int2		context;
7093 	Int4		q_off, s_off, offset, word_width;
7094 	register Int4 bits_to_shift, min_diag_length, min_diag_mask;
7095 	Int8	number_of_hits=0;
7096 	register Int4 num_hits;
7097 	register ModLookupPositionPtr lookup_pos;
7098 	ModLAEntry *mod_lt=lookup->mod_lt;
7099 	ewp_params=search->ewp_params;
7100 	prelim = search->prelim;
7101 
7102 /* this function only does final run, prelim is done by BlastWordFinder_mh_contig */
7103 	if (prelim)
7104 		return 1;
7105 
7106 	char_size = lookup->char_size;
7107 	mask = lookup->mask;
7108 	offset = ewp_params->offset;
7109 	subject0 = s = search->subject->sequence;
7110 	min_diag_length = ewp_params->min_diag_length;
7111 	bits_to_shift = ewp_params->bits_to_shift;
7112 	min_diag_mask = ewp_params->min_diag_mask;
7113 
7114 /* The word_width tells how "long" a word is; if it's contiguous then it's
7115 the size of the word. */
7116 	word_width = lookup->wordsize;
7117 
7118 
7119 	if (search->current_hitlist == NULL)
7120 	{
7121 		search->current_hitlist = BlastHitListNew(search);
7122 	}
7123 	else
7124 	{ /* Scrub the hitlist. */
7125 		if (search->current_hitlist_purge)
7126 			BlastHitListPurge(search->current_hitlist);
7127 	}
7128 
7129 	/* subject is too short to find anything! */
7130 	if (word_width > search->subject->length)
7131 		return 0;
7132 
7133 	s = lookup_find_init(lookup, &index, s);
7134 	lookup_index = index;
7135 
7136         /* Determines when to stop scanning the database. */
7137         s_end = subject0 + search->subject->length;
7138 	if ((search->last_context-search->first_context+1) > 1)
7139 	{
7140 	    for (;;)
7141 	    {
7142 		do {
7143 			/* lookup a contiguous word. */
7144 			s++;
7145         		lookup_index = (((lookup_index) & mask)<<char_size) + *s;
7146 			if (s == s_end)
7147 				goto NormalReturn;
7148 		} while (mod_lt[lookup_index].num_used == 0);
7149 
7150 		num_hits = mod_lt[lookup_index].num_used;
7151 		lookup_pos = mod_lt[lookup_index].entries;
7152 		hit_info = *((Uint4 *) lookup_pos);
7153 		lookup_pos++;
7154 
7155                 if(num_hits > 3){
7156                     if(search->pbp->is_rps_blast) {
7157                         lookup_pos = (ModLookupPositionPtr) ((Uint1Ptr) lookup->mod_lookup_table_memory + (Uint4) *lookup_pos);
7158                     } else {
7159                         lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
7160                     }
7161                 }
7162 
7163  		s_off = s-subject0;
7164 		diag_tmp = s_off + min_diag_length;
7165 		/* Extend each hit in the linked list */
7166 		do {
7167 #ifdef BLAST_COLLECT_STATS
7168 		    number_of_hits++;
7169 #endif
7170 		    q_off = hinfo_get_pos(hit_info);
7171 		    context = hinfo_get_context(hit_info);
7172 		    num_hits--;
7173 		    hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
7174 		    lookup_pos++;
7175 
7176 		    diag = diag_tmp - q_off;
7177 
7178 		    real_diag = diag & min_diag_mask;
7179 		    ewp=search->context[context].ewp;
7180 		    combo_array = ewp->combo_array;
7181 
7182 		    if (combo_array[real_diag].diag_level > (s_off+offset))
7183 		    {
7184 			continue;
7185 		    }
7186 		    if (!(search->positionBased)) {
7187 		      if (BlastWordExtend(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, context) != 0)
7188 			goto ErrorReturn;
7189 		    }
7190 		    else {
7191 		      if (BlastNewWordExtend(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, context) != 0)
7192 			goto ErrorReturn;
7193                     }
7194 		} while (num_hits>0);
7195 	   }
7196 	}
7197 	else	/* only one context. */
7198 	{
7199 	   ewp=search->context[search->first_context].ewp;
7200 	   combo_array = ewp->combo_array;
7201 	   for (;;)
7202 	   {
7203 		do {
7204 			/* lookup a contiguous word. */
7205         		lookup_index = (((lookup_index) & mask)<<char_size);
7206 			s++;
7207         		lookup_index += *s;
7208 			if (s == s_end)
7209 				goto NormalReturn;
7210 		} while (mod_lt[lookup_index].num_used == 0);
7211 
7212 
7213 		num_hits = mod_lt[lookup_index].num_used;
7214 		lookup_pos = mod_lt[lookup_index].entries;
7215 		hit_info = *((Uint4 *) lookup_pos);
7216 		lookup_pos++;
7217 
7218                 if(num_hits > 3){
7219                     if(search->pbp->is_rps_blast) {
7220                 	lookup_pos = (ModLookupPositionPtr) ((Uint1Ptr) lookup->mod_lookup_table_memory + (Uint4) *lookup_pos);
7221                     } else {
7222                 	lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
7223                     }
7224 		}
7225 
7226 		s_off = s-subject0;
7227 		diag_tmp = s_off + min_diag_length;
7228 		/* Extend each hit in the linked list */
7229 		do {
7230 #ifdef BLAST_COLLECT_STATS
7231 		    number_of_hits++;
7232 #endif
7233 		    q_off = hinfo_get_pos(hit_info);
7234 		    num_hits--;
7235 		    hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
7236 		    lookup_pos++;
7237 
7238 		    diag = diag_tmp - q_off;
7239 		    real_diag = diag & min_diag_mask;
7240 		    if (combo_array[real_diag].diag_level > (s_off+offset))
7241 		    {
7242 			continue;
7243 		    }
7244 		    if (!(search->positionBased)) {
7245 		      if (BlastWordExtend(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, 0) != 0)
7246 			goto ErrorReturn;
7247 		    }
7248 		    else {
7249 		      if (BlastNewWordExtend(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, 0) != 0)
7250 			goto ErrorReturn;
7251 		    }
7252 		} while (num_hits>0);
7253 	   }
7254 	}
7255 
7256 NormalReturn:
7257 	if (search->prelim)
7258 		search->first_pass_hits += number_of_hits;
7259 	else
7260 		search->second_pass_hits += number_of_hits;
7261 	BlastExtendWordExit(search);
7262 	return search->current_hitlist->hspcnt;
7263 
7264 ErrorReturn:
7265 	BlastExtendWordExit(search);
7266 	return 3;
7267 }
7268 
7269 /***************************************************************************
7270 *	This function is called once for each subject sequence.
7271 *
7272 *	New (experimental) version of the Word Finder that makes use of
7273 *	an idea of Stephen Altschul's.  Multiple hits are found before a
7274 *	hit is extended.
7275 
7276 *	"diagpos" is an Int4 array that is as long as the query sequence
7277 *	and the longest database sequence.   An efficient comparison of
7278 *	whether a new hit is in the same window as the last one is done
7279 *	by keeping track of how far along an "imaginary" array (i.e.,
7280 *	increment) one is; this array changes every time this function is
7281 *	called by the subject length plus window.
7282 *
7283 ***************************************************************************/
7284 /*----------   search a sequence with 1 Context, 1 Letter per byte  ---------*/
7285 static Int4
BlastWordFinder_mh(BlastSearchBlkPtr search)7286 BlastWordFinder_mh(BlastSearchBlkPtr search)
7287 {
7288 	BLAST_WordFinderPtr	wfp;
7289 	LookupTablePtr lookup;
7290 	BLAST_ParameterBlkPtr	pbp;
7291 
7292 	pbp = search->pbp;
7293 	if (search->prelim == TRUE)
7294 	{
7295 		wfp=search->wfp_first;
7296 		if (pbp->cutoff_s2_set == TRUE)
7297 			pbp->cutoff_s2 = pbp->cutoff_s2_max;
7298 		else
7299 			pbp->cutoff_s2 = MIN(pbp->cutoff_s_first, pbp->cutoff_s2_max);
7300 		pbp->X = pbp->dropoff_1st_pass;
7301 	}
7302 	else
7303 	{
7304 		wfp=search->wfp_second;
7305 		if (pbp->cutoff_s2_set == TRUE)
7306 			pbp->cutoff_s2 = pbp->cutoff_s2_max;
7307 		else
7308 			pbp->cutoff_s2 = MIN(pbp->cutoff_s_second, pbp->cutoff_s2_max);
7309 		pbp->X = search->pbp->dropoff_2nd_pass;
7310 	}
7311 
7312 	lookup = wfp->lookup;
7313 
7314 	if (search->prog_number == blast_type_blastn)
7315 	{
7316 		return BlastNtWordFinder_mh(search, lookup);
7317 	}
7318 	else
7319 	{
7320 		return BlastWordFinder_mh_contig(search, lookup);
7321 	}
7322 }
7323 
7324 /****************************************************************************
7325 
7326 	This function scans the database, looking for matches to the words in
7327 	the 'lookup_index'.
7328 
7329 	In order to keep track of how far along a certain diagonal has already
7330 	been extended an Int4 array that is twice as long as the shortest sequence
7331 	is used (actually it is the power of two that is more than twice as long as the
7332 	shortest sequence).  There is a need for a mapping from 'true' diagonals (which would
7333 	be the length of both query and database sequence) to the pseudo-diagonals
7334 	used here (i.e., the Int4 array).  This is done below with the 'version'.
7335 	The procedure is as follows:
7336 
7337 	1.) diag_tmp is calculated with the 'subject' offset + min_diag_length: s_off + min_diag_length
7338 	(min_diag_length is 2**n such that n is large enough to make min_diag_length larger
7339 	than the shorter of the query and database sequence).
7340 
7341 	2.) diag is calculated with diag_tmp - q_off.  This is the 'real' diagonal, except
7342 	for the sum min_diag_length.
7343 
7344 	3.) real_diag is calculated by keeping only those bits in diag that are less than
7345 	min_diag_length-1.  This provides a unique number within a range.
7346 
7347 	4.) the version is calculated by shifting over 'bits_to_shift', which
7348 	corresonds to dividing by min_diag_length.
7349 
7350 	5.) the combination of the version and the 'real_diag' provide a unique location
7351 	for the diagonal.
7352 
7353 
7354 
7355 	modifications (cfj):
7356  	 - changed hash_table entries to reduce cache misses (see comments in lookup.c)
7357 	 - when walking through sequence, precompute next_index and prefetch the entry
7358 	 - combined last_hit/version/diag_level into array of struct for better locality.
7359 	 - eliminated the need for the version[] array by changing the value stored as diag_level.
7360 	     (This is done by measuring diag_level along s (rather than q) -- With this measure,
7361 	     previous hits found in XX[real_diag] will either really be from the same diag, or will
7362 	     have a diag_level and last_hit much smaller (by at least min_diag_length) than the current
7363 	     position.)
7364 
7365 
7366 ******************************************************************************/
7367 
7368 
7369 
7370 static Int4
BlastWordFinder_mh_contig(BlastSearchBlkPtr search,LookupTablePtr lookup)7371 BlastWordFinder_mh_contig(BlastSearchBlkPtr search, LookupTablePtr lookup)
7372 {
7373     register Uint1Ptr	s;
7374     register Uint1Ptr s_end;
7375     Uint1Ptr	subject0;
7376     BLAST_Diag	diag, diag_tmp, real_diag;
7377     BLAST_ExtendWordPtr     ewp, ewp_pointer[40];
7378     Uint4 q_off;
7379     register Int4 s_off;
7380     Uint2 context;
7381     Int4 diff, offset, s_pos, window;
7382     Int4 min_diag_length, min_diag_mask;
7383     Int4 *last_hit_p;
7384     CfjModStruct *combo_array;
7385     CfjModStruct *ca_ptr[40];
7386     register ModLookupPositionPtr lookup_pos;
7387     register Uint4 hit_info;
7388 
7389     Int4 char_size, lookup_index, mask, wordsize;
7390     Int4 next_lindex;
7391     Int4 * next_nhits_addr;
7392     Int4 word_width, index=0;
7393     Int8 number_of_hits=0;
7394     register Int4 num_hits;
7395     register Int4 next_nhits;
7396 
7397     BLAST_ExtendWordParamsPtr     ewp_params;
7398     Boolean			prelim, succeed_to_right;
7399     ModLAEntry *mod_lt=lookup->mod_lt;
7400     PV_ARRAY_TYPE *pv_array = lookup->pv_array;
7401     register PV_ARRAY_TYPE PNTR next_pv_array_addr;
7402     register PV_ARRAY_TYPE next_pv_val,pv_val;
7403 
7404     ewp = NULL;	/* Gets rid of a warning. */
7405 
7406     ewp_params=search->ewp_params;
7407     prelim = search->prelim;
7408 
7409     /* The word_width tells how "long" a word is; for a contiguous word it's
7410        the length of the word. */
7411     word_width = lookup->wordsize;
7412 
7413     wordsize = lookup->wordsize;
7414     char_size = lookup->char_size;
7415     mask = lookup->mask;
7416     subject0 = s = (Uint1Ptr) search->subject->sequence;
7417 
7418     window = ewp_params->window;
7419     offset = ewp_params->offset;
7420     min_diag_length = ewp_params->min_diag_length;
7421     min_diag_mask = ewp_params->min_diag_mask;
7422 
7423     if (search->current_hitlist == NULL) {
7424         search->current_hitlist = BlastHitListNew(search);
7425     } else { /* Scrub the hitlist. */
7426         if (search->current_hitlist_purge)
7427             BlastHitListPurge(search->current_hitlist);
7428     }
7429 
7430     /* subject is too short to find anything! */
7431     if (word_width > search->subject->length)
7432         return 0;
7433 
7434     /* Move along string to appropriate starting point. */
7435     s = lookup_find_init(lookup, &index, s);
7436     lookup_pos=NULL;
7437     lookup_index = index;
7438     /* Determines when to stop scanning the database. */
7439     s_end = subject0 + search->subject->length;
7440 
7441     if (pv_array) {
7442         if ((search->last_context-search->first_context+1) > 1) {
7443             /* Only used if more than one context. */
7444             for (index=search->first_context; index<=search->last_context; index++){
7445                 ewp_pointer[index] = search->context[index].ewp;
7446                 ca_ptr[index]=ewp_pointer[index]->combo_array;
7447             }
7448             s_off = (Int4) (s - subject0);
7449             next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7450             next_pv_val = pv_array[next_lindex>>PV_ARRAY_BTS];
7451 
7452             for (;;) {
7453                 do {
7454                     /* lookup a contiguous word. */
7455                     s++;
7456                     lookup_index = next_lindex;
7457 
7458                     if (s == s_end)
7459                         goto NormalReturn;
7460 
7461                     next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7462                     next_pv_array_addr = &pv_array[next_lindex>>PV_ARRAY_BTS];
7463                     pv_val = next_pv_val;
7464                     next_pv_val = *next_pv_array_addr;
7465 
7466                 } while ((pv_val&(((PV_ARRAY_TYPE) 1)<<(lookup_index&PV_ARRAY_MASK))) == 0);
7467 
7468                 num_hits = mod_lt[lookup_index].num_used;
7469 
7470                 /* Changed by TLM. */
7471                 lookup_pos = mod_lt[lookup_index].entries;
7472                 hit_info = *((Uint4 *) lookup_pos);
7473                 lookup_pos++;
7474 
7475                 if(num_hits > 3){
7476                     if(search->pbp->is_rps_blast) {
7477                         lookup_pos = (ModLookupPositionPtr) ((Uint1Ptr) lookup->mod_lookup_table_memory + (Uint4) *lookup_pos);
7478                     } else {
7479                         lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
7480                     }
7481                 }
7482                 /* Changed by TLM. */
7483                 s_off = (Int4) (s - subject0);
7484 
7485                 s_pos = s_off + offset;
7486                 diag_tmp = s_off + min_diag_length;
7487 
7488                 /* Extend each hit in the linked list */
7489                 /* Each link corresponds to different hits on the query sequence */
7490                 do {  /* for each hit */
7491 
7492 #ifdef BLAST_COLLECT_STATS
7493                     number_of_hits++;
7494 #endif
7495                     q_off = hinfo_get_pos(hit_info);
7496                     context = hinfo_get_context(hit_info);
7497                     hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
7498 
7499                     diag = diag_tmp - q_off;
7500                     real_diag = (diag_tmp - q_off) & min_diag_mask;
7501                     /* conxtext dependent values */
7502                     combo_array = ca_ptr[context];
7503 
7504                     last_hit_p = &combo_array[real_diag].last_hit;
7505                     diff = s_pos - *last_hit_p;
7506                     num_hits--;
7507                     lookup_pos++;
7508 
7509                     /* diff is always greater than window for the first time in a function. */
7510                     if (diff >= window) {
7511                         *last_hit_p = s_pos;
7512                     } else if (diff >= wordsize) {
7513                         succeed_to_right = TRUE;
7514                         if (combo_array[real_diag].diag_level <= (s_off+offset)) {
7515                             ewp = ewp_pointer[context];
7516                             ewp->actual_window = diff;
7517                             if (!(search->positionBased)) {
7518                                 if (BlastWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, context) != 0)
7519 				    goto ErrorReturn;
7520                             } else {
7521                                 if (BlastNewWordExtend_prelim(search, q_off, s_off, word_width, diag,  real_diag, &succeed_to_right, context) != 0)
7522 				    goto ErrorReturn;
7523                             }
7524                             if (search->current_hitlist->hspcnt > 0 && prelim)
7525                                 goto NormalReturn;
7526 
7527                         }
7528                         if (succeed_to_right)
7529                             *last_hit_p = 0;
7530                         else
7531                             *last_hit_p = s_pos;
7532                     }
7533 		} while(num_hits>0); /* end for pos_cnt... */
7534 	    }
7535 	} else { /* Only one context. */
7536 
7537 	    ewp=search->context[search->first_context].ewp;
7538 	    combo_array=ewp->combo_array;
7539 
7540             next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7541             next_pv_val = pv_array[next_lindex>>PV_ARRAY_BTS];
7542 
7543             for (;;) {
7544 	        do {
7545                     /* lookup a contiguous word. */
7546                     s++;
7547                     lookup_index = next_lindex;
7548 
7549                     if (s == s_end)
7550                         goto NormalReturn;
7551 
7552                     next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7553                     next_pv_array_addr = &pv_array[next_lindex>>PV_ARRAY_BTS];
7554                     pv_val = next_pv_val;
7555                     next_pv_val = *next_pv_array_addr;
7556 
7557 		} while ((pv_val&(((PV_ARRAY_TYPE) 1)<<(lookup_index&PV_ARRAY_MASK))) == 0);
7558 
7559 		num_hits = mod_lt[lookup_index].num_used;
7560 
7561                 /* Changed by TLM. */
7562 		lookup_pos = mod_lt[lookup_index].entries;
7563 		hit_info = *((Uint4 *) lookup_pos);
7564 		lookup_pos++;
7565 
7566 		if(num_hits > 3){
7567                     if(search->pbp->is_rps_blast) {
7568                         lookup_pos = (ModLookupPositionPtr) ((Uint1Ptr) lookup->mod_lookup_table_memory + (Uint4) *lookup_pos);
7569                     } else {
7570                         lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
7571                     }
7572 		}
7573 
7574                 /* Changed by TLM. */
7575 		s_off = (Int4) (s - subject0);
7576 		s_pos = s_off + offset;
7577 		diag_tmp = s_off + min_diag_length;
7578 
7579                 /* Extend each hit in the linked list */
7580                 /* Each link corresponds to different hits on the query sequence */
7581 		do {  /* for each hit */
7582 
7583 #ifdef BLAST_COLLECT_STATS
7584                     number_of_hits++;
7585 #endif
7586                     /* Changed by TLM. */
7587                     q_off = hit_info;
7588                     num_hits--;
7589                     hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
7590                     lookup_pos++;
7591 
7592                     diag = diag_tmp - q_off;
7593                     real_diag = diag & min_diag_mask;
7594 
7595                     last_hit_p = &combo_array[real_diag].last_hit;
7596                     diff = s_pos - *last_hit_p;
7597 
7598 
7599                     /* diff is always greater than window for the first time in a function. */
7600                     if (diff >= window) {
7601                         *last_hit_p = s_pos;
7602                     } else if (diff >= wordsize) {
7603                         succeed_to_right = TRUE;
7604                         if (combo_array[real_diag].diag_level <= (s_off+offset)) {
7605                             ewp->actual_window = diff;
7606                             if (!(search->positionBased)) {
7607                                 if (BlastWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, 0) != 0)
7608 				    goto ErrorReturn;
7609                             } else {
7610                                 if (BlastNewWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, 0) != 0)
7611 				    goto ErrorReturn;
7612                             }
7613                             if (search->current_hitlist->hspcnt > 0 && prelim)
7614                                 goto NormalReturn;
7615 
7616                         }
7617                         if (succeed_to_right)
7618                             *last_hit_p = 0;
7619                         else
7620                             *last_hit_p = s_pos;
7621                     }
7622 		} while(num_hits > 0); /* end for pos_cnt... */
7623 
7624 
7625             }
7626 	}
7627     } else {
7628 	if ((search->last_context-search->first_context+1) > 1) {
7629             /* Only used if more than one context. */
7630             for (index=search->first_context; index<=search->last_context; index++){
7631 		ewp_pointer[index] = search->context[index].ewp;
7632 		ca_ptr[index]=ewp_pointer[index]->combo_array;
7633             }
7634 	    next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7635 	    next_nhits_addr=&mod_lt[next_lindex].num_used ;
7636 	    next_nhits=*next_nhits_addr;
7637 	    s_off = (Int4) (s - subject0);
7638 	    for (;;) {
7639 		do {
7640                     /* lookup a contiguous word. */
7641                     lookup_index = next_lindex;
7642                     s++;
7643 
7644                     if (s == s_end) goto NormalReturn;
7645 
7646                     next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7647 
7648                     next_nhits_addr = &mod_lt[next_lindex].num_used;
7649 
7650                     num_hits = next_nhits;
7651                     next_nhits=*next_nhits_addr;
7652 
7653 		} while (num_hits == 0);
7654 
7655                 /* Changed by TLM. */
7656 		lookup_pos = mod_lt[lookup_index].entries;
7657 		hit_info = *((Uint4 *) lookup_pos);
7658 		lookup_pos++;
7659 
7660 		if(num_hits>3){
7661                     if(search->pbp->is_rps_blast) {
7662                         lookup_pos = (ModLookupPositionPtr) ((Uint1Ptr) lookup->mod_lookup_table_memory + (Uint4) *lookup_pos);
7663                     } else {
7664                         lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
7665                     }
7666 		}
7667                 /* Changed by TLM. */
7668 	    	s_off = (Int4) (s - subject0);
7669 
7670 		s_pos = s_off + offset;
7671 		diag_tmp = s_off + min_diag_length;
7672 
7673                 /* Extend each hit in the linked list */
7674                 /* Each link corresponds to different hits on the query sequence */
7675  		/* printf(" dtmp:%3d     ",diag_tmp); */
7676 		do{  /* for each hit */
7677 
7678 #ifdef BLAST_COLLECT_STATS
7679                     number_of_hits++;
7680 #endif
7681                     q_off = hinfo_get_pos(hit_info);
7682                     context = hinfo_get_context(hit_info);
7683                     hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
7684 
7685                     diag = diag_tmp - q_off;
7686                     real_diag = (diag_tmp - q_off) & min_diag_mask;
7687                     /* conxtext dependent values */
7688                     combo_array = ca_ptr[context];
7689 
7690                     last_hit_p = &combo_array[real_diag].last_hit;
7691                     diff = s_pos - *last_hit_p;
7692                     num_hits--;
7693                     lookup_pos++;
7694 
7695                     /* diff is always greater than window for the first time in a function. */
7696                     if (diff >= window) {
7697                         *last_hit_p = s_pos;
7698                     } else if (diff >= wordsize) {
7699                         succeed_to_right = TRUE;
7700                         if (combo_array[real_diag].diag_level <= (s_off+offset)) {
7701                             ewp = ewp_pointer[context];
7702                             ewp->actual_window = diff;
7703                             if (!(search->positionBased)) {
7704                                 if (BlastWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, context) != 0)
7705 				    goto ErrorReturn;
7706                             } else {
7707                                 if (BlastNewWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, context) != 0)
7708 				    goto ErrorReturn;
7709                             }
7710                             if (search->current_hitlist->hspcnt > 0 && prelim)
7711                                 goto NormalReturn;
7712 
7713                         }
7714                         if (succeed_to_right)
7715                             *last_hit_p = 0;
7716                         else
7717                             *last_hit_p = s_pos;
7718                     }
7719 		} while(num_hits>0); /* end for pos_cnt... */
7720 	    }
7721 	} else { /* Only one context. */
7722 
7723 	    ewp=search->context[search->first_context].ewp;
7724 	    combo_array=ewp->combo_array;
7725 
7726 	    next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7727             /* CHanged by TLM.
7728                next_nhits_addr=&mod_lt[next_lindex].num_used ;
7729                next_nhits=*next_nhits_addr;
7730             */
7731 	    next_nhits=mod_lt[next_lindex].num_used ;
7732 	    for (;;) {
7733 		do {
7734                     /* lookup a contiguous word. */
7735                     lookup_index = next_lindex;
7736                     s++;
7737                     if (s == s_end) goto NormalReturn;
7738 
7739                     next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
7740                     /* CHanged by TLM.
7741                        next_nhits_addr = &mod_lt[next_lindex].num_used;
7742                     */
7743 
7744                     num_hits = next_nhits;
7745                     next_nhits=mod_lt[next_lindex].num_used;
7746 
7747 		} while (num_hits == 0);
7748 
7749 
7750                 /* Changed by TLM. */
7751 		lookup_pos = mod_lt[lookup_index].entries;
7752 		hit_info = *((Uint4 *) lookup_pos);
7753 		lookup_pos++;
7754 
7755 		if(num_hits > 3){
7756                     if(search->pbp->is_rps_blast) {
7757                         lookup_pos = (ModLookupPositionPtr) ((Uint1Ptr) lookup->mod_lookup_table_memory + (Uint4) *lookup_pos);
7758                     } else {
7759                         lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
7760                     }
7761 		}
7762 
7763                 /* Changed by TLM. */
7764 		s_off = (Int4) (s - subject0);
7765 		s_pos = s_off + offset;
7766 		diag_tmp = s_off + min_diag_length;
7767 
7768                 /* Extend each hit in the linked list */
7769                 /* Each link corresponds to different hits on the query sequence */
7770 		do {  /* for each hit */
7771 
7772 #ifdef BLAST_COLLECT_STATS
7773                     number_of_hits++;
7774 #endif
7775                     /* Changed by TLM. */
7776                     q_off = hit_info;
7777                     num_hits--;
7778                     hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
7779                     lookup_pos++;
7780 
7781                     diag = diag_tmp - q_off;
7782                     real_diag = diag & min_diag_mask;
7783 
7784                     last_hit_p = &combo_array[real_diag].last_hit;
7785                     diff = s_pos - *last_hit_p;
7786 
7787 /* diff is always greater than window for the first time in a function. */
7788                     if (diff >= window) {
7789                         *last_hit_p = s_pos;
7790                     } else if (diff >= wordsize) {
7791                         succeed_to_right = TRUE;
7792                         if (combo_array[real_diag].diag_level <= (s_off+offset)) {
7793                             ewp->actual_window = diff;
7794                             if (!(search->positionBased)) {
7795                                 if (BlastWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, 0) != 0)
7796 				    goto ErrorReturn;
7797                             } else {
7798                                 if (BlastNewWordExtend_prelim(search, q_off, s_off, word_width, diag, real_diag, &succeed_to_right, 0) != 0)
7799 				    goto ErrorReturn;
7800                             }
7801                             if (search->current_hitlist->hspcnt > 0 && prelim)
7802                                 goto NormalReturn;
7803 
7804                         }
7805                         if (succeed_to_right)
7806                             *last_hit_p = 0;
7807                         else
7808                             *last_hit_p = s_pos;
7809                     }
7810 		} while(num_hits>0); /* end for pos_cnt... */
7811             }  /* for(;;) */
7812 	}
7813     }
7814 
7815  NormalReturn:
7816     if (search->prelim)
7817         search->first_pass_hits += number_of_hits;
7818     else
7819         search->second_pass_hits += number_of_hits;
7820     BlastExtendWordExit(search);
7821     return search->current_hitlist->hspcnt;
7822 
7823  ErrorReturn:
7824     BlastExtendWordExit(search);
7825     return 3;
7826 }
7827 
7828 /* BlastWordExtend -- extend a word-sized hit to a longer match */
7829 static Int2
BlastWordExtend(BlastSearchBlkPtr search,Int4 q_off,Int4 s_off,Int4 word_width,BLAST_Diag diag,BLAST_Diag real_diag,Boolean PNTR succeed_to_right,Int2 context)7830 BlastWordExtend(BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context)
7831 {
7832 	BLAST_ExtendWordPtr     ewp;
7833 	BLAST_ParameterBlkPtr	pbp;
7834 	BLAST_ScoreBlkPtr	sbp;
7835 	BLAST_Score		leftsum, rightsum, rightscore, leftscore;
7836 	Uint1Ptr		query;
7837 	register Uint1Ptr	q, s;
7838 	register Uint1Ptr	q_right, q_left, s_left, q_best_right, q_best_left;
7839 	register BLAST_Score	score, sum;
7840 	register BLAST_ScorePtr PNTR	matrix;
7841 	register BLAST_Score	x, X;
7842 
7843 
7844 	q_best_left = NULL;	/* Gets rid of warning. */
7845 	q_best_right = NULL;	/* Gets rid of warning. */
7846 
7847 #ifdef BLAST_COLLECT_STATS
7848 	if (search->prelim)
7849 		search->first_pass_extends++;
7850 	else
7851 		search->second_pass_extends++;
7852 #endif
7853 
7854 	*succeed_to_right = FALSE;
7855 
7856 	ewp=search->context[context].ewp;
7857 
7858 	diag -= search->ewp_params->min_diag_length;
7859 
7860 	sbp=search->sbp;
7861 	pbp=search->pbp;
7862 
7863 	query = search->context[context].query->sequence;
7864 	q = query + q_off;
7865 	s = search->subject->sequence + s_off;
7866 
7867 	X=pbp->X;
7868 	matrix = sbp->matrix;
7869 
7870 	score=0;
7871 	sum = 0;
7872 	q_left = q - word_width;
7873 	q_right = q;
7874 
7875 /* Look for the highest scoring region in the initial word. */
7876 	while (q > q_left)
7877 	{
7878 		if ((sum += matrix[*q][*s]) > score)
7879 		{
7880 			score = sum;
7881 			q_best_right = q_right;
7882 			q_best_left = q;
7883 		}
7884 		else if (sum <= 0)
7885 		{
7886 			sum = 0;
7887 			q_right = q-1;
7888 		}
7889 		q--; s--;
7890 	}
7891 
7892 	leftsum = rightsum = rightscore = 0;
7893 
7894 /* q_left is the where the "attempted" extension along the query was
7895 stopped (and may be picked up again if the "goto Extend_Left" is used).
7896 q_best_left is the "best" extension along the query that should be
7897 reported. Analogous logic applies to q_right and q_best_right. */
7898 
7899 	q_left = q_best_left;
7900 	q_right = q_best_right;
7901 
7902 	q = q_left;
7903 	s = search->subject->sequence + (q - query) + diag;
7904 	sum = leftsum;
7905 
7906         x = X;
7907 	while (sum > x)
7908 	{
7909 		q--; s--;
7910 		if ((sum += matrix[*q][*s]) > 0)
7911 		{
7912 			do {
7913 				score += sum;
7914 				q_best_left = q;
7915 				q--; s--;
7916 			} while ((sum = matrix[*q][*s]) > 0);
7917 		}
7918 	}
7919 
7920 	if (score > rightscore && rightsum > X && -rightscore > X)
7921 	{
7922 		leftscore = score;
7923 		leftsum = sum;
7924 		q_left = q;
7925 
7926 		q = q_right;
7927 		s = search->subject->sequence + (q - query) + diag;
7928 		sum = rightsum;
7929 
7930 /* "score" is actually the "maxscore", if sum drops by "score", then the
7931 total new score is zero and the extension can stop. */
7932 		if ((x = -score) < X)
7933 			x = X;
7934 
7935 		while (sum > x)
7936 		{
7937 			q++; s++;
7938 			if ((sum += matrix[*q][*s]) > 0)
7939 			{
7940 				do {
7941 					score += sum;
7942 					q_best_right = q;
7943 					q++; s++;
7944 				} while ((sum = matrix[*q][*s]) > 0);
7945 				/* do this if score changes. */
7946 				if ((x = -score) < X)
7947 					x = X;
7948 			}
7949 		}
7950 
7951 		q_right = q;
7952 	}
7953 
7954 	/* Record how far this diagonal has been traversed,
7955 	"q_right" was the last position on the query sequence.
7956 	ewp_params->offset is added to provide the proper "zero-point" */
7957 	ewp->combo_array[real_diag].diag_level = q_right - query - q_off + word_width + s_off + search->ewp_params->offset;
7958 
7959 	if (score >= pbp->cutoff_s2) /* Score is reportable */
7960 	{
7961 
7962 #ifdef BLAST_COLLECT_STATS
7963 		if (search->prelim)
7964 			search->first_pass_good_extends++;
7965 		else
7966 			search->second_pass_good_extends++;
7967 #endif
7968 		s_left = search->subject->sequence + (q_best_left - query) + diag;
7969 		BlastSaveCurrentHsp(search, score, (q_best_left-query), (s_left-search->subject->sequence), (q_best_right-q_best_left+1), context);
7970 	}
7971 
7972 	return 0;
7973 }
7974 /*AAS*/
7975 /* BlastWordExtend -- extend a word-sized hit to a longer match,
7976    BlastNewWordExtend is position based */
7977 static Int2
BlastNewWordExtend(BlastSearchBlkPtr search,Int4 q_off,Int4 s_off,Int4 word_width,BLAST_Diag diag,BLAST_Diag real_diag,Boolean PNTR succeed_to_right,Int2 context)7978 BlastNewWordExtend(BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context)
7979 {
7980 	BLAST_ExtendWordPtr     ewp;
7981 	BLAST_ParameterBlkPtr	pbp;
7982 	BLAST_Score		leftsum, rightsum, rightscore, leftscore;
7983 	Uint1Ptr		query;
7984 	register Uint1Ptr	q, s;
7985 	register Uint1Ptr	q_right, q_left, s_left, q_best_right, q_best_left;
7986 	register BLAST_Score	score, sum;
7987 	register BLAST_Score	x, X;
7988 
7989 
7990 #ifdef BLAST_COLLECT_STATS
7991 	if (search->prelim)
7992 		search->first_pass_extends++;
7993 	else
7994 		search->second_pass_extends++;
7995 #endif
7996 
7997 	*succeed_to_right = FALSE;
7998 
7999 	ewp=search->context[context].ewp;
8000 
8001 	diag -= search->ewp_params->min_diag_length;
8002 
8003 	pbp=search->pbp;
8004 
8005 	query = search->context[context].query->sequence;
8006 	q = query + q_off;
8007 	s = search->subject->sequence + s_off;
8008 
8009 	X=pbp->X;
8010 
8011 	score=0;
8012 	sum = 0;
8013 	q_left = q - word_width;
8014 	q_right = q;
8015         q_best_left = q;
8016 	q_best_right = q; /*AAS*/
8017 
8018 /* Look for the highest scoring region in the initial word. */
8019 	while (q > q_left)
8020 	{
8021 		if ((sum += MtrxScorePosSearch(search->sbp,
8022 				(Int4) (q - query),*s)) > score)
8023 		{
8024 			score = sum;
8025 			q_best_right = q_right;
8026 			q_best_left = q;
8027 		}
8028 		else if (sum <= 0)
8029 		{
8030 			sum = 0;
8031 			q_right = q-1;
8032 		}
8033 		q--; s--;
8034 	}
8035 
8036 	if ((x = -score) < X)
8037 		x = X;
8038 
8039 	leftsum = rightsum = rightscore = 0;
8040 
8041 /* q_left is the where the "attempted" extension along the query was
8042 stopped (and may be picked up again if the "goto Extend_Left" is used).
8043 q_best_left is the "best" extension along the query that should be
8044 reported. Analogous logic applies to q_right and q_best_right. */
8045 
8046 	q_left = q_best_left;
8047 	q_right = q_best_right;
8048 
8049 	q = q_left;
8050 	s = search->subject->sequence + (q - query) + diag;
8051 	sum = leftsum;
8052 	x = X;
8053 
8054 	do
8055 	{
8056 		q--; s--;
8057 		if (((q -query) >=0) &&
8058 		    (sum += MtrxScorePosSearch(search->sbp,
8059 				(Int4) (q - query),*s)) > 0)
8060 		{
8061 			do {
8062 				score += sum;
8063 				q_best_left = q;
8064 				q--; s--;
8065 			} while (((q -query) >= 0) &&
8066 			   ((sum = MtrxScorePosSearch(search->sbp,
8067 					(Int4) (q - query),*s)) > 0));
8068 		}
8069 	} while (((q -query) >= 0) && (sum >= x));
8070 
8071 
8072 	if (score > rightscore && rightsum > X && -rightscore > X)
8073 	{
8074 		leftscore = score;
8075 		leftsum = sum;
8076 		q_left = q;
8077 
8078 		q = q_right;
8079 		s = search->subject->sequence + (q - query) + diag;
8080 		sum = rightsum;
8081 
8082 /* "score" is actually the "maxscore", if sum drops by "score", then the
8083 total new score is zero and the extension can stop. */
8084 		if ((x = -score) < X)
8085 			x = X;
8086 
8087 		do
8088 		{
8089 			q++; s++;
8090 			if ((sum += MtrxScorePosSearch(search->sbp,
8091 					(Int4) (q - query),*s)) > 0)
8092 			{
8093 				do {
8094 					score += sum;
8095 					q_best_right = q;
8096 					q++; s++;
8097 				} while ((sum = MtrxScorePosSearch(search->sbp,
8098 					(Int4) (q - query),*s)) > 0);
8099 				/* do this if score changes. */
8100 				if ((x = -score) < X)
8101 					x = X;
8102 			}
8103 		} while (sum >= x);
8104 
8105 		q_right = q;
8106 	}
8107 
8108 	/* Record how far this diagonal has been traversed,
8109 	"q_right" was the last position on the query sequence.
8110 	ewp_params->offset is added to provide the proper "zero-point" */
8111 	ewp->combo_array[real_diag].diag_level = q_right - query - q_off + word_width + s_off + search->ewp_params->offset;
8112 
8113 	if (score >= pbp->cutoff_s2) /* Score is reportable */
8114 	{
8115 
8116 #ifdef BLAST_COLLECT_STATS
8117 		if (search->prelim)
8118 			search->first_pass_good_extends++;
8119 		else
8120 			search->second_pass_good_extends++;
8121 #endif
8122 		s_left = search->subject->sequence + (q_best_left - query) + diag;
8123 		BlastSaveCurrentHsp(search, score, (q_best_left-query), (s_left-search->subject->sequence), (q_best_right-q_best_left+1), context);
8124 	}
8125 
8126 	return 0;
8127 }
8128 
8129 
8130 
8131 /* BlastWordExtend_prelim -- for timing purposes. */
8132 static Int2
BlastWordExtend_prelim(BlastSearchBlkPtr search,Int4 q_off,Int4 s_off,Int4 word_width,BLAST_Diag diag,BLAST_Diag real_diag,Boolean PNTR succeed_to_right,Int2 context)8133 BlastWordExtend_prelim(BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context)
8134 {
8135 	BLAST_ExtendWordPtr     ewp;
8136 	BLAST_ParameterBlkPtr	pbp;
8137 	BLAST_ScoreBlkPtr	sbp;
8138 	register Uint1Ptr	q, s, query;
8139 	register Uint1Ptr	q_right, q_left, s_left, q_best_right, q_best_left;
8140 	register BLAST_Score	score, sum;
8141 	register BLAST_ScorePtr PNTR	matrix;
8142 	register BLAST_Score	x, X;
8143 	Uint4 query_num; /* AM: Support for query multiplexing. */
8144 
8145 
8146 
8147 	q_best_left = NULL;	/* Gets rid of warning. */
8148 	q_best_right = NULL;	/* Gets rid of warning. */
8149 
8150 #ifdef BLAST_COLLECT_STATS
8151 	if (search->prelim)
8152 		search->first_pass_extends++;
8153 	else
8154 		search->second_pass_extends++;
8155 #endif
8156 
8157 	*succeed_to_right = FALSE;
8158 
8159 	ewp=search->context[context].ewp;
8160 
8161 	diag -= search->ewp_params->min_diag_length;
8162 
8163 	sbp=search->sbp;
8164 	pbp=search->pbp;
8165 
8166 	query = search->context[context].query->sequence;
8167 	q = query + q_off;
8168 	s =  search->subject->sequence + s_off;
8169 
8170         /* AM: Support for query multiplexing. */
8171 	if( search->prog_number == blast_type_tblastn && search->mult_queries )
8172 	{
8173 	  query_num = GetQueryNum( search->mult_queries, q_off - word_width + 1,
8174 	                           q_off + 1, 0 );
8175           X = search->mult_queries->dropoff_2nd_pass_array[query_num];
8176 	}
8177 	else X=pbp->X;
8178 
8179 	matrix = sbp->matrix;
8180 
8181 	score=0;
8182 	sum = 0;
8183 	q_left = q - word_width;
8184 	q_right = q;
8185 
8186 /* Look for the highest scoring region in the initial word. */
8187 	while (q > q_left)
8188 	{
8189 		sum += matrix[*q][*s];
8190 		if (sum > score)
8191 		{
8192 			score = sum;
8193 			q_best_right = q_right;
8194 			q_best_left = q;
8195 		}
8196 		else if (sum <= 0)
8197 		{
8198 			sum = 0;
8199 			q_right = q-1;
8200 		}
8201 		q--; s--;
8202 	}
8203 
8204 	q = q_left = q_best_left;
8205 	s = s_left = search->subject->sequence + (q_left - query) + diag;
8206 
8207 	q_left--;
8208 
8209 /******************************************************************
8210 
8211 The extension procedure used here is to:
8212 
8213 1.) keep on extending as long as it increases the total score so far, record this
8214 maximum score and the corresponding extents as each new maximum score is reached.
8215 
8216 2.) if extending decreases the total score so far then keep on extending
8217 until the score has dropped by "X" from the last maximum score to explore
8218 whether it is only a local minima that has been encountered:
8219 
8220 	a.) if the score drops by "X" from the last maximum score, then stop
8221 	the extension and record the last maximum score as well as the
8222 	corresponding extents for query and subject.
8223 
8224 	b.) if the score recovers again and becomes higher than the last maximum
8225 	score, reset the maximum score so far as well as the corresponding
8226 	query and subject offsets.
8227 
8228 
8229 3.) When the end of a sequence (either query or subject) is encountered record the last
8230 maximum score as well as the corresponding extents.
8231 
8232 
8233 
8234 In the "while" loop below the maximum score is the variable "score" and "sum"
8235 is the change since the maximum score was last recorded (i.e., the variable
8236 "score" was modified).
8237 
8238 Both x and X are negative and the outer "while" loops continues
8239 as long as sum is less negative than x.  Iterations of the "while"
8240 loop with "sum" containing a negative value corresponds to 2.) above.
8241 
8242 The inner do-while loop is executed only as long as each extension
8243 increases the maximum score, corresponding to 1.) above.
8244 
8245 There is no explicit check for the end of a sequence here, but
8246 between sequences in the blast database there is a "sentinel"
8247 byte.  If this sentinel byte is encountered then matrix[*q][*s]
8248 will be much more negative than "X" so that the extension will
8249 stop.  This corresponds to 3.) above.
8250 
8251 *******************************************************************/
8252 
8253 	sum = 0;
8254 	x = X;
8255 	while (sum > x)
8256 	{
8257 		q--; s--;
8258 		if ((sum += matrix[*q][*s]) > 0)
8259 		{
8260 			do {
8261 				score += sum;
8262 				q--; s--;
8263 			} while ((sum = matrix[*q][*s]) > 0);
8264 			q_left = q;
8265 		}
8266 	}
8267 	/* Adjust for extra decrement in do-while loop above. */
8268 	q_left++;
8269 	s_left = search->subject->sequence + (q_left - query) + diag;
8270 
8271 
8272 /* Extend towards the right (for this preliminary run) if
8273 q_off - q_left is greater than the window. */
8274 	if (((query+q_off)-q_left) >= ewp->actual_window)
8275 	{
8276 		*succeed_to_right = TRUE;
8277 		q = q_right = q_best_right;
8278 		s = search->subject->sequence + (q - query) + diag;
8279 		sum = 0;
8280                 q_right++;  /* pre-increment in case while() loop doesn't run */
8281 
8282 /**************************************************************
8283 
8284 The extension to the right is performed in the same way as the extension
8285 to the left, except that the extension can stop if the score
8286 drops by X or becomes negative, in which case the last maximum score
8287 is recorded.
8288 
8289 *****************************************************************/
8290 		if ((x = -score) < X)
8291 			x = X;
8292 		while (sum > x)
8293 		{
8294 			q++; s++;
8295 			if ((sum += matrix[*q][*s]) > 0)
8296 			{
8297 				do {
8298 					score += sum;
8299 					q++; s++;
8300 				} while ((sum = matrix[*q][*s]) > 0);
8301 				q_right = q;
8302 				/* do this if score changes. */
8303 				if ((x = -score) < X)
8304 					x = X;
8305 			}
8306 		}
8307 		/* Adjust for extra increment in do-while loop above. */
8308 		q_right--;
8309 	}
8310 
8311 	/* Record how far this diagonal has been traversed,
8312 	"q" was the last position on the query sequence.
8313 	ewp->offset is added to provide the proper "zero-point" */
8314 	ewp->combo_array[real_diag].diag_level = q - query - q_off + word_width + s_off + search->ewp_params->offset;
8315 
8316 	if (score >= pbp->cutoff_s2) /* Score is reportable */
8317 	{
8318 
8319 #ifdef BLAST_COLLECT_STATS
8320 		if (search->prelim)
8321 			search->first_pass_good_extends++;
8322 		else
8323 			search->second_pass_good_extends++;
8324 #endif
8325 
8326 		BlastSaveCurrentHsp(search, score, (q_left-query), (s_left-search->subject->sequence), (q_right-q_left+1), context);
8327 	}
8328 
8329 	return 0;
8330 }
8331 
8332 /*AAS*/
8333 /* BlastWordExtend_prelim -- for timing purposes. */
8334 static Int2
BlastNewWordExtend_prelim(BlastSearchBlkPtr search,Int4 q_off,Int4 s_off,Int4 word_width,BLAST_Diag diag,BLAST_Diag real_diag,Boolean PNTR succeed_to_right,Int2 context)8335 BlastNewWordExtend_prelim(BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, Int4 word_width, BLAST_Diag diag, BLAST_Diag real_diag, Boolean PNTR succeed_to_right, Int2 context)
8336 {
8337 	BLAST_ExtendWordPtr     ewp;
8338 	BLAST_ParameterBlkPtr	pbp;
8339 	register Uint1Ptr	q, s, query;
8340 	register Uint1Ptr	q_right, q_left, s_left, q_best_right, q_best_left;
8341 	register BLAST_Score	score, sum;
8342 	register BLAST_Score	x, X;
8343 
8344 
8345 
8346 #ifdef BLAST_COLLECT_STATS
8347 	if (search->prelim)
8348 		search->first_pass_extends++;
8349 	else
8350 		search->second_pass_extends++;
8351 #endif
8352 
8353 	*succeed_to_right = FALSE;
8354 
8355 	ewp=search->context[context].ewp;
8356 
8357 	diag -= search->ewp_params->min_diag_length;
8358 
8359 	pbp=search->pbp;
8360 
8361 	query = search->context[context].query->sequence;
8362 	q = query + q_off;
8363 	s = search->subject->sequence + s_off;
8364 
8365 	X=pbp->X;
8366 
8367 	score=0;
8368 	sum = 0;
8369 	q_left = q - word_width;
8370 	q_right = q+1;
8371         q_best_left = q;
8372         q_best_right = q; /*AAS*/
8373 
8374 /* Look for the highest scoring region in the initial word. */
8375 	while (q > q_left)
8376 	{
8377 		sum += MtrxScorePosSearch(search->sbp,(Int4) (q - query),*s);
8378 		if (sum > score)
8379 		{
8380 			score = sum;
8381 			q_best_right = q_right;
8382 			q_best_left = q;
8383 		}
8384 		else if (sum <= 0)
8385 		{
8386 			sum = 0;
8387 			q_right = q;
8388 		}
8389 		q--; s--;
8390 	}
8391 
8392 	q = q_left = q_best_left;
8393 	s = s_left = search->subject->sequence + (q_left - query) + diag;
8394 
8395 	q_left--;
8396 
8397 	sum = 0;
8398 	x = X;
8399 	while (((q - query) >= 0) && (sum > x))
8400 	{
8401 		q--; s--;
8402 		if (((q - query) >= 0) &&
8403 		    ((sum += MtrxScorePosSearch(search->sbp,
8404 					(Int4) (q - query),*s)) > 0))
8405 		{
8406 			do {
8407 				score += sum;
8408 				q--; s--;
8409 			} while (((q -query) >= 0) &&
8410 				 ((sum = MtrxScorePosSearch(search->sbp,
8411 					(Int4) ( q- query),*s)) > 0));
8412 			q_left = q;
8413 		}
8414 	}
8415 	/* Adjust for extra decrement in do-while loop above. */
8416 	q_left++;
8417 	s_left = search->subject->sequence + (q_left - query) + diag;
8418 
8419 /* Extend towards the right (for this preliminary run) if
8420 q_off - q_left is greater than the window. */
8421 	if (((query+q_off)-q_left) >= ewp->actual_window)
8422 	{
8423 		*succeed_to_right = TRUE;
8424 		q = q_right = q_best_right;
8425 		q--;
8426 		s = search->subject->sequence + (q - query) + diag;
8427 		sum = 0;
8428 
8429 /* "score" is actually the "maxscore", if sum drops by "score", then the
8430 total new score is zero and the extension can stop. */
8431 		if ((x = -score) < X)
8432 			x = X;
8433 		while (sum > x)
8434 		{
8435 			q++; s++;
8436 			if ((sum += MtrxScorePosSearch(search->sbp,
8437 					(Int4) (q - query),*s)) > 0)
8438 			{
8439 				do {
8440 					score += sum;
8441 					q++; s++;
8442 				} while ((sum = MtrxScorePosSearch(search->sbp,
8443 						(Int4) (q - query),*s)) > 0);
8444 				q_right = q;
8445 				/* do this if score changes. */
8446 				if ((x = -score) < X)
8447 					x = X;
8448 			}
8449 		}
8450 		/* Adjust for extra increment in do-while loop above. */
8451 		q_right--;
8452 	}
8453 
8454 	/* Record how far this diagonal has been traversed,
8455 	"q" was the last position on the query sequence.
8456 	ewp->offset is added to provide the proper "zero-point" */
8457 	ewp->combo_array[real_diag].diag_level = q - query -q_off + word_width + s_off + search->ewp_params->offset;
8458 
8459 	if (score >= pbp->cutoff_s2) /* Score is reportable */
8460 	{
8461 
8462 #ifdef BLAST_COLLECT_STATS
8463 		if (search->prelim)
8464 			search->first_pass_good_extends++;
8465 		else
8466 			search->second_pass_good_extends++;
8467 #endif
8468 
8469 		BlastSaveCurrentHsp(search, score, (q_left-query), (s_left-search->subject->sequence), (q_right-q_left+1), context);
8470 	}
8471 
8472 	return 0;
8473 }
8474 
8475 
8476 /* Ungapped extension a blastn type word hit, to be used in Mega BLAST with
8477    discontiguous word models.
8478 
8479 	BlastSearchBlkPtr search: main BLAST structure,
8480 	Int4 q_off: offset of query sequence,
8481 	Int4 s_off: offset of subject sequence, divided by four!
8482         Return: true if ungapped score below cutoff (to indicate that this
8483                 HSP should be deleted.
8484 */
8485 
8486 Boolean
BlastNtWordUngappedExtend(BlastSearchBlkPtr search,Int4 q_off,Int4 s_off,Int4 cutoff)8487 BlastNtWordUngappedExtend(BlastSearchBlkPtr search, Int4 q_off, Int4 s_off,
8488                           Int4 cutoff)
8489 {
8490 	register Uint1Ptr	q;
8491 	register BLAST_ScorePtr PNTR	matrix;
8492 	register BLAST_Score	sum, score;
8493 	Uint1	ch;
8494 	Uint1Ptr query0, subject0, sf, q_beg, q_end, s_beg, s_end, s, start;
8495 	BLAST_Score	X;
8496 	Int2		remainder, base;
8497         BLAST_ParameterBlkPtr   pbp;
8498         BLAST_ScoreBlkPtr       sbp;
8499 	Int4 q_avail, s_avail;
8500 
8501         base = 3 - (s_off % 4);
8502 
8503         sbp=search->sbp;
8504         pbp=search->pbp;
8505 
8506 	matrix = sbp->matrix;
8507 	matrix = sbp->matrix;
8508 	query0 = (Uint1Ptr) search->context[search->first_context].query->sequence;
8509 	subject0 = (Uint1Ptr) search->subject->sequence;
8510         q_avail = search->context[search->first_context].query->length - q_off;
8511         s_avail = search->subject->length - s_off;
8512 
8513 	q = q_beg = q_end = query0 + q_off;
8514 	s = s_beg = s_end = subject0 + s_off/READDB_COMPRESSION_RATIO;
8515 	if (q_off < s_off) {
8516            start = (Uint1Ptr) search->subject->sequence +
8517               (s_off-q_off)/READDB_COMPRESSION_RATIO;
8518            remainder = 3 - ((s_off-q_off)%READDB_COMPRESSION_RATIO);
8519 	} else {
8520            start = (Uint1Ptr) search->subject->sequence;
8521            remainder = 3;
8522 	}
8523 
8524 	/* Find where positive scoring starts & ends within the word hit */
8525 	score = 0;
8526         sum = 0;
8527 
8528 	X = pbp->X;
8529 
8530 	/* extend to the left */
8531 	do {
8532            if (base == 3) {
8533               s--;
8534               base = 0;
8535            } else
8536               base++;
8537            ch = *s;
8538            if ((sum += matrix[*--q][READDB_UNPACK_BASE_N(ch, base)]) > 0) {
8539               q_beg = q;
8540               score += sum;
8541               sum = 0;
8542            } else if (sum < X)
8543               break;
8544 	} while ((s > start) || (s == start && base <= remainder));
8545 
8546         if (score >= cutoff)
8547            return FALSE;
8548 
8549         if (q_avail < s_avail) {
8550            sf = subject0 + (s_off + q_avail)/READDB_COMPRESSION_RATIO;
8551            remainder = 3 - ((s_off + q_avail)%READDB_COMPRESSION_RATIO);
8552         } else {
8553            sf = subject0 + (search->subject->length)/READDB_COMPRESSION_RATIO;
8554            remainder = 3 - ((search->subject->length)%READDB_COMPRESSION_RATIO);
8555         }
8556 	/* extend to the right */
8557 	q = q_end;
8558 	s = s_end;
8559 	sum = 0;
8560         base = 3 - (s_off % 4);
8561 
8562 	while (s < sf || (s == sf && base >= remainder)) {
8563            ch = *s;
8564            if ((sum += matrix[*q++][READDB_UNPACK_BASE_N(ch, base)]) > 0) {
8565               q_end = q;
8566               score += sum;
8567               sum = 0;
8568            } else if (sum < X)
8569               break;
8570            if (base == 0) {
8571               base = 3;
8572               s++;
8573            } else
8574               base--;
8575 	}
8576 
8577         return (score < cutoff);
8578 }
8579 
8580 /* Extend a blastn type word hit.
8581 
8582 	BlastSearchBlkPtr search: main BLAST structure,
8583 	Int4 q_off: offset of query sequence,
8584 	Int4 s_off: offset of subject sequence, divided by four!
8585 	BLAST_Diag real_diag: diagonal,
8586 	Int2 context: must be 0 (plus strand) or 1 (minus strand).
8587 */
8588 Int2
BlastNtWordExtend(BlastSearchBlkPtr search,Int4 q_off,Int4 s_off,BLAST_Diag real_diag,Int2 context)8589 BlastNtWordExtend(BlastSearchBlkPtr search, Int4 q_off, Int4 s_off, BLAST_Diag real_diag, Int2 context)
8590 {
8591 	register Uint1Ptr	q;
8592 	register BLAST_ScorePtr PNTR	matrix;
8593 	register BLAST_Score	sum, score;
8594 	Uint1	ch;
8595 	Uint1Ptr query0, subject0, sf, q_beg, q_end, s_beg, s_end, s, start;
8596 	BLAST_Score	X;
8597 	Int2		remainder;
8598         BLAST_ExtendWordPtr     ewp;
8599         BLAST_ParameterBlkPtr   pbp;
8600         BLAST_ScoreBlkPtr       sbp;
8601 	Int4 q_avail, s_avail;
8602 
8603 #ifdef BLAST_COLLECT_STATS
8604 	search->second_pass_extends++;
8605 #endif
8606 	ewp=search->context[context].ewp;
8607 
8608         sbp=search->sbp;
8609         pbp=search->pbp;
8610 
8611 	matrix = sbp->matrix;
8612 	matrix = sbp->matrix;
8613 	query0 = (Uint1Ptr) search->context[context].query->sequence;
8614 	subject0 = (Uint1Ptr) search->subject->sequence;
8615         q_avail = search->context[context].query->length - q_off;
8616         s_avail = search->subject->length - s_off*READDB_COMPRESSION_RATIO;
8617         if (q_avail < s_avail)
8618         {
8619                 sf = subject0 + s_off + q_avail/READDB_COMPRESSION_RATIO;
8620 		remainder = q_avail%READDB_COMPRESSION_RATIO;
8621         }
8622         else
8623         {
8624                 sf = subject0 + (search->subject->length)/READDB_COMPRESSION_RATIO;
8625 		remainder = (search->subject->length)%READDB_COMPRESSION_RATIO;
8626         }
8627 
8628 	q = q_beg = q_end = query0 + q_off;
8629 	s = s_beg = s_end = subject0 + s_off;
8630 	if (q_off < s_off*READDB_COMPRESSION_RATIO)
8631 	{
8632 		start = (Uint1Ptr) search->subject->sequence + (s_off-q_off/READDB_COMPRESSION_RATIO);
8633 	}
8634 	else
8635 	{
8636 		start = (Uint1Ptr) search->subject->sequence;
8637 	}
8638 
8639 	/* Find where positive scoring starts & ends within the word hit */
8640 	score = sum = 0;
8641 
8642 	X = pbp->X;
8643 
8644 	/* extend to the left */
8645 	do {
8646 		s--;
8647 		ch = *s;
8648 		if ((sum += matrix[*--q][READDB_UNPACK_BASE_4(ch)]) > 0) {
8649 			q_beg = q;
8650 			score += sum;
8651 			sum = 0;
8652 		}
8653 		else
8654             if (sum < X)
8655 				break;
8656 		if ((sum += matrix[*--q][READDB_UNPACK_BASE_3(ch)]) > 0) {
8657 			q_beg = q;
8658 			score += sum;
8659 			sum = 0;
8660 		}
8661 		else
8662             if (sum < X)
8663 				break;
8664 		if ((sum += matrix[*--q][READDB_UNPACK_BASE_2(ch)]) > 0) {
8665 			q_beg = q;
8666 			score += sum;
8667 			sum = 0;
8668 		}
8669 		else
8670             if (sum < X)
8671 				break;
8672 		if ((sum += matrix[*--q][READDB_UNPACK_BASE_1(ch)]) > 0) {
8673 			q_beg = q;
8674 			score += sum;
8675 			sum = 0;
8676 		}
8677 		else
8678             if (sum < X)
8679 				break;
8680 	} while (s > start);
8681 
8682 	/* There is still another partial byte to be extended through. */
8683     if (sum >= X && start != (Uint1Ptr) search->subject->sequence)
8684 	{
8685 		s--;
8686 		ch = *s;
8687 		while (q > query0)
8688 		{
8689 			if ((sum += matrix[*--q][READDB_UNPACK_BASE_4(ch)]) > 0)
8690 			{
8691 				q_beg = q;
8692 				score += sum;
8693 				sum = 0;
8694 			}
8695             else if (sum < X)
8696 			{
8697 				break;
8698 			}
8699 			ch >>= 2;
8700 		}
8701 	}
8702 
8703 	/* extend to the right */
8704 	q = q_end;
8705 	s = s_end;
8706 	sum = 0;
8707 	while (s < sf)
8708 	{
8709 		ch = *s;
8710 		if ((sum += matrix[*q++][READDB_UNPACK_BASE_1(ch)]) > 0)
8711 		{
8712 			q_end = q;
8713 			score += sum;
8714 			sum = 0;
8715 		}
8716 		else if (sum < X)
8717 		{
8718 				break;
8719 		}
8720 
8721 		if ((sum += matrix[*q++][READDB_UNPACK_BASE_2(ch)]) > 0)
8722 		{
8723 			q_end = q;
8724 			score += sum;
8725 			sum = 0;
8726 		}
8727 		else if (sum < X)
8728 		{
8729 				break;
8730 		}
8731 
8732 		if ((sum += matrix[*q++][READDB_UNPACK_BASE_3(ch)]) > 0)
8733 		{
8734 			q_end = q;
8735 			score += sum;
8736 			sum = 0;
8737 		}
8738 		else if (sum < X)
8739 		{
8740 				break;
8741 		}
8742 
8743 		if ((sum += matrix[*q++][READDB_UNPACK_BASE_4(ch)]) > 0)
8744 		{
8745 			q_end = q;
8746 			score += sum;
8747 			sum = 0;
8748 		}
8749 		else if (sum < X)
8750 		{
8751 				break;
8752 		}
8753 		s++;
8754 	}
8755 
8756 	/* extend into the final, partially packed byte (if one exists) */
8757 /* If the query ends before the subject, then don't extend any more as the query
8758 has no remainder. */
8759 	if (remainder > 0 && sum >= X)
8760 	{
8761 		ch = *sf;
8762 
8763 		while (remainder > 0)
8764 		{
8765 			if ((sum += matrix[*q++][READDB_UNPACK_BASE_1(ch)]) > 0)
8766 			{
8767 				q_end = q;
8768 				score += sum;
8769 				sum = 0;
8770 			}
8771 			else if (sum < X)
8772 			{
8773 					break;
8774 			}
8775 #ifdef OLD_BYTE_ORDER
8776 			ch >>= 2;
8777 #else
8778 			ch <<= 2;
8779 #endif
8780 			remainder--;
8781 		}
8782 	} /* End ungapped alignment */
8783 
8784 	/* Record how far this diagonal has been traversed */
8785 	/* 	ewp->combo_array[real_diag].diag_level = q_end - query0 + search->ewp_params->offset; */
8786 	ewp->combo_array[real_diag].diag_level = (q_end - query0 - q_off) + s_off*READDB_COMPRESSION_RATIO + search->ewp_params->offset;
8787 
8788 
8789 
8790     if (score >= pbp->cutoff_s2) /* Score is reportable */
8791     {
8792 #ifdef BLAST_COLLECT_STATS
8793         search->second_pass_good_extends++;
8794 #endif
8795         if(search->pbp->gapped_calculation)
8796             BlastNtSaveCurrentHsp(search, score, (q_beg-query0),
8797                 (q_beg-query0+READDB_COMPRESSION_RATIO*s_off-q_off),
8798                 (q_end-q_beg), context,
8799                 q_off - 5, READDB_COMPRESSION_RATIO*s_off - 5);
8800         else
8801             BlastSaveCurrentHsp(search, score,
8802                 (q_beg-query0),
8803                 (q_beg-query0+READDB_COMPRESSION_RATIO*s_off-q_off),
8804                 (q_end-q_beg), context);
8805     }
8806 
8807 	return 0;
8808 }
8809 
8810 /*
8811 	search_nt_orig -- an adaptation of the original search_nt() function
8812 	of BLASTN
8813 
8814 	* Can this ever be called?
8815 	*  - It is only called for blastn, only from BlastWordFinder_mh().
8816 	*  - BlastWordFinder_mh() is only called if BlastExtendWordSearch() is called w/ multiphe_hits==TRUE
8817 	*  - BlastExtendWordSearch() is called in 2 places:
8818 	*       BLASTPerform2PassSearch(called w/ multiple_hits=TRUE)
8819 	*         which is called from BLASTPerfromSearch if search->pbp->two_pass_method
8820 	*       BLASTPerformFinalSearch(called w/ search->pbp->multiple_hits_only)
8821 	*  * so multiple_hits_only, or two_pass_method must be set for this to be called.
8822 	*  For blastn, these are set to false in blastool, blastutl.
8823 	*  These can be set to TRUE in blastpgp.c, but can blastn alse be true in this case?
8824 	*
8825 	*  I have updated the array accesses to use the new mod_lt[], as the other WordFinder routines
8826 	*  now do, but I have not been able to test this change.
8827 	* -cfj
8828 */
8829 static Int4
BlastNtWordFinder_mh(BlastSearchBlkPtr search,LookupTablePtr lookup)8830 BlastNtWordFinder_mh(BlastSearchBlkPtr search, LookupTablePtr lookup)
8831 {
8832 	register Uint1Ptr s, s_end;
8833 	Uint1Ptr subject0;
8834 	BLAST_Diag	diag, diag_tmp, real_diag;
8835 	BLAST_ExtendWordPtr     ewp;
8836 	BLAST_ExtendWordParamsPtr     ewp_params;
8837 	BLAST_WordFinderPtr	wfp;
8838 	CfjModStruct *combo_array;
8839         register Int4  diff, window, lookup_index, mask;
8840         Int4  char_size, index=0, current_count;
8841 	register ModLookupPosition hit_info;
8842         Int4            s_pos, q_off, s_off, offset, virtual_wordsize, wordsize, compressed_wordsize, compression_factor;
8843         register Int4 bits_to_shift, min_diag_length, min_diag_mask;
8844 	register Int4 num_hits;
8845 	register Int4 next_nhits;
8846         Int4 * next_nhits_addr;
8847 	register ModLookupPositionPtr lookup_pos;
8848 	int next_lindex;
8849     	PV_ARRAY_TYPE *pv_array = lookup->pv_array;
8850     	register PV_ARRAY_TYPE PNTR next_pv_array_addr;
8851     	register PV_ARRAY_TYPE next_pv_val,pv_val;
8852 #if 1
8853 	ModLAEntry *mod_lt=lookup->mod_lt;
8854 #endif
8855 
8856 	ewp_params=search->ewp_params;
8857 
8858 	wfp = search->wfp_second;
8859         char_size = lookup->char_size;
8860         mask = lookup->mask;
8861         offset = ewp_params->offset;
8862         window = ewp_params->window;
8863         subject0 = s = (Uint1Ptr) search->subject->sequence;
8864         min_diag_length = ewp_params->min_diag_length;
8865         bits_to_shift = ewp_params->bits_to_shift;
8866         min_diag_mask = ewp_params->min_diag_mask;
8867 
8868         if (search->current_hitlist == NULL)
8869         {
8870                 search->current_hitlist = BlastHitListNew(search);
8871         }
8872         else
8873         { /* Scrub the hitlist. */
8874                 if (search->current_hitlist_purge)
8875                         BlastHitListPurge(search->current_hitlist);
8876         }
8877 
8878 	compressed_wordsize = lookup->wordsize;
8879 	wordsize = wfp->wordsize;
8880 
8881 /* The subject sequence is too short, exit this function now. */
8882 	if (wordsize > search->subject->length)
8883 		goto NormalReturn;
8884 
8885 	s = lookup_find_init(lookup, &index, s);
8886         lookup_index = index;
8887 /* Determines when to stop scanning the database; does not include remainder. */
8888         s_end = subject0 + (search->subject->length)/READDB_COMPRESSION_RATIO;
8889 	compression_factor = wfp->compression_ratio*compressed_wordsize;
8890 	virtual_wordsize = READDB_COMPRESSION_RATIO*compressed_wordsize;
8891 
8892 	/* conxtext dependent values */
8893 	ewp=search->context[search->first_context].ewp;
8894 	combo_array=ewp->combo_array;
8895 
8896 	if (pv_array)
8897 	{
8898            next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
8899 	   next_pv_val = pv_array[next_lindex>>PV_ARRAY_BTS];
8900 
8901            for (;;) {
8902               do {
8903                         /* lookup a contiguous word. */
8904                         lookup_index = next_lindex;
8905                         s++;
8906 
8907                         if (s == s_end) goto NormalReturn;
8908 
8909                         next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
8910 			next_pv_array_addr = &pv_array[next_lindex>>PV_ARRAY_BTS];
8911                     	pv_val = next_pv_val;
8912                     	next_pv_val = *next_pv_array_addr;
8913                 } while ((pv_val&(((PV_ARRAY_TYPE) 1)<<(lookup_index&PV_ARRAY_MASK))) == 0);
8914 
8915 
8916 		num_hits = mod_lt[lookup_index].num_used;
8917 		lookup_pos = mod_lt[lookup_index].entries;
8918 		hit_info = *((Uint4 *) lookup_pos);
8919 		lookup_pos++;
8920 
8921 		if(num_hits>3){
8922 		  lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
8923 		}
8924 
8925 		s_off = s-subject0+1;
8926 		diag_tmp = s_off*READDB_COMPRESSION_RATIO + min_diag_length;
8927 		s_pos = (s-subject0)*READDB_COMPRESSION_RATIO+offset;
8928 		/* Extend each hit in the linked list */
8929 		do {
8930 		        q_off = hit_info;
8931 			num_hits--;
8932 			hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
8933 			lookup_pos++;
8934 
8935 		    	diag = diag_tmp - q_off;
8936 		    	real_diag = diag & min_diag_mask;
8937 
8938 			diff = s_pos - combo_array[real_diag].last_hit;
8939 
8940 			if (diff >= window)
8941 			{
8942 				combo_array[real_diag].last_hit = s_pos;
8943 			}
8944 			else if (diff >= virtual_wordsize)
8945 			{
8946 #ifdef BLAST_COLLECT_STATS
8947 				search->second_pass_hits++;
8948 #endif
8949         			current_count = search->current_hitlist->hspcnt;
8950 				if (combo_array[real_diag].diag_level <= (s_off*READDB_COMPRESSION_RATIO+offset))
8951 				{
8952 					if (BlastNtWordExtend(search, q_off, s_off, real_diag, search->first_context) != 0)
8953 						goto ErrorReturn;
8954 				}
8955 				/* If no HSP's saved, save last hit. */
8956 				if (current_count == search->current_hitlist->hspcnt)
8957 					combo_array[real_diag].last_hit = s_pos;
8958 				else
8959 					combo_array[real_diag].last_hit = 0;
8960 			}
8961 
8962 		} while (num_hits>0);
8963 	   }
8964 	}
8965 	else
8966 	{
8967            next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
8968            next_nhits_addr=&mod_lt[next_lindex].num_used ;
8969            next_nhits=*next_nhits_addr;
8970 
8971            for (;;) {
8972               do {
8973                         /* lookup a contiguous word. */
8974                         lookup_index = next_lindex;
8975                         s++;
8976 
8977                         if (s == s_end) goto NormalReturn;
8978 
8979                         next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
8980 
8981                         next_nhits_addr = &mod_lt[next_lindex].num_used;
8982 
8983                         num_hits = next_nhits;
8984                         next_nhits=*next_nhits_addr;
8985                 } while (num_hits == 0);
8986 
8987 		lookup_pos = mod_lt[lookup_index].entries;
8988 		hit_info = *((Uint4 *) lookup_pos);
8989 		lookup_pos++;
8990 
8991 		if(num_hits>3){
8992 		  lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
8993 		}
8994 
8995 		s_off = s-subject0+1;
8996 		diag_tmp = s_off*READDB_COMPRESSION_RATIO + min_diag_length;
8997 		s_pos = (s-subject0)*READDB_COMPRESSION_RATIO+offset;
8998 		/* Extend each hit in the linked list */
8999 		do {
9000 		        q_off = hit_info;
9001 			num_hits--;
9002 			hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
9003 			lookup_pos++;
9004 
9005 		    	diag = diag_tmp - q_off;
9006 		    	real_diag = diag & min_diag_mask;
9007 
9008 			diff = s_pos - combo_array[real_diag].last_hit;
9009 
9010 			if (diff >= window)
9011 			{
9012 				combo_array[real_diag].last_hit = s_pos;
9013 			}
9014 			else if (diff >= virtual_wordsize)
9015 			{
9016 #ifdef BLAST_COLLECT_STATS
9017 				search->second_pass_hits++;
9018 #endif
9019         			current_count = search->current_hitlist->hspcnt;
9020 				if (combo_array[real_diag].diag_level <= (s_off*READDB_COMPRESSION_RATIO+offset))
9021 				{
9022 					if (BlastNtWordExtend(search, q_off, s_off, real_diag, search->first_context) != 0)
9023 						goto ErrorReturn;
9024 				}
9025 				/* If no HSP's saved, save last hit. */
9026 				if (current_count == search->current_hitlist->hspcnt)
9027 					combo_array[real_diag].last_hit = s_pos;
9028 				else
9029 					combo_array[real_diag].last_hit = 0;
9030 			}
9031 
9032 		} while (num_hits>0);
9033 	   }
9034 	}
9035 
9036 NormalReturn:
9037 	BlastExtendWordExit(search);
9038         return search->current_hitlist->hspcnt;
9039 
9040 ErrorReturn:
9041 	BlastExtendWordExit(search);
9042 	return 3;
9043 }
9044 /*
9045 	search_nt_orig -- an adaptation of the original search_nt() function
9046 	of BLASTN
9047 */
9048 static Int4
BlastNtWordFinder(BlastSearchBlkPtr search,LookupTablePtr lookup)9049 BlastNtWordFinder(BlastSearchBlkPtr search, LookupTablePtr lookup)
9050 {
9051 	BLASTContextStructPtr search_context;
9052 	register Uint1Ptr s, s_end;
9053 	Uint1Ptr q, q_end, subject0, query0;
9054 	Uint1		p, packed_query, p_start;
9055 	BLAST_Diag	diag, diag_tmp, real_diag;
9056 	BLAST_ExtendWordPtr     ewp;
9057 	BLAST_ExtendWordParamsPtr     ewp_params;
9058 	BLAST_WordFinderPtr	wfp;
9059 	CfjModStruct *combo_array;
9060         Int4  lookup_index, mask;
9061         Int4  char_size, index=0, query_length=0;
9062 
9063 	register ModLookupPosition hit_info;
9064 
9065 	register PV_ARRAY_TYPE PNTR next_pv_array_addr;
9066 	register PV_ARRAY_TYPE next_pv_val,pv_val;
9067 	register ModLookupPositionPtr lookup_pos;
9068 	register Int4 num_hits;
9069 	register Int4 next_nhits;
9070 	Int4 * next_nhits_addr;
9071 
9072 
9073         Int2            left, right;
9074         Int4            q_off, s_off, offset, virtual_wordsize, wordsize, compressed_wordsize, compression_factor;
9075 	Int4	extra_bytes, extra_bytes_needed, my_index;
9076         Int4 bits_to_shift, min_diag_length, min_diag_mask;
9077 	int next_lindex;
9078 	PV_ARRAY_TYPE *pv_array = lookup->pv_array;
9079 	ModLAEntry *mod_lt=lookup->mod_lt;
9080 
9081 	query0 = NULL;	/* Gets rid of a warning. */
9082 	p = 255;	/* Gets rid of a warning. */
9083 	ewp_params=search->ewp_params;
9084 
9085 	wfp = search->wfp_second;
9086         char_size = lookup->char_size;
9087         mask = lookup->mask;
9088         offset = ewp_params->offset;
9089         subject0 = s = (Uint1Ptr) search->subject->sequence;
9090         min_diag_length = ewp_params->min_diag_length;
9091         bits_to_shift = ewp_params->bits_to_shift;
9092         min_diag_mask = ewp_params->min_diag_mask;
9093 
9094 
9095         if (search->current_hitlist == NULL)
9096         {
9097                 search->current_hitlist = BlastHitListNew(search);
9098         }
9099         else
9100         { /* Scrub the hitlist. */
9101                 if (search->current_hitlist_purge)
9102                         BlastHitListPurge(search->current_hitlist);
9103         }
9104 
9105 	compressed_wordsize = lookup->reduced_wordsize;
9106 	wordsize = wfp->wordsize;
9107 	extra_bytes = lookup->wordsize - compressed_wordsize;
9108 
9109 /* The subject sequence is too short, exit this function now. */
9110 	if (wordsize > search->subject->length)
9111 		goto NormalReturn;
9112 
9113 	s = lookup_find_init(lookup, &index, s);
9114         lookup_index = index;
9115 /* Determines when to stop scanning the database; does not include remainder. */
9116         s_end = subject0 + (search->subject->length)/READDB_COMPRESSION_RATIO;
9117 	compression_factor = wfp->compression_ratio*compressed_wordsize;
9118 	virtual_wordsize = wordsize - READDB_COMPRESSION_RATIO*lookup->wordsize;
9119 	search_context = search->context;
9120 	query_length = search_context[search->first_context].query->length;
9121         extra_bytes_needed = extra_bytes;
9122 	query0 = search_context[search->first_context].query->sequence;
9123 	q_end = query0 + query_length;
9124 	ewp = search_context[search->first_context].ewp;
9125 	combo_array = ewp->combo_array;
9126 
9127         if (extra_bytes_needed) {
9128 
9129 	/** The first for() loop is optimized for sparse tables (which rarely hit), the second for dense */
9130 	if(pv_array){
9131 	  /* We use the pv_array here, since (on short-med queries) most lookups fail */
9132 
9133 	  next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
9134 	  next_pv_val = pv_array[next_lindex>>PV_ARRAY_BTS];
9135 
9136 	  for (;;) {
9137 	     do {
9138 		/* lookup a contiguous word. */
9139 		s++;
9140 		lookup_index = next_lindex;
9141 
9142 		if (s == s_end)
9143 		   goto NormalReturn;
9144 
9145 		next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
9146 		next_pv_array_addr = &pv_array[next_lindex>>PV_ARRAY_BTS];
9147 		pv_val = next_pv_val;
9148 		next_pv_val = *next_pv_array_addr;
9149 
9150 	     }while ((pv_val&(((PV_ARRAY_TYPE) 1)<<(lookup_index&PV_ARRAY_MASK))) == 0);
9151 
9152 		num_hits = mod_lt[lookup_index].num_used;
9153 		s_off = s-subject0+1;
9154 		diag_tmp = s_off*READDB_COMPRESSION_RATIO + min_diag_length;
9155 		lookup_pos = mod_lt[lookup_index].entries;
9156 		hit_info = *((Uint4 *) lookup_pos);
9157 		lookup_pos++;
9158 
9159 		if(num_hits>3){
9160 		  lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
9161 		}
9162 
9163 		p_start = *((Uint1Ptr) search->subject->sequence + s_off);
9164 		/* Extend each hit in the linked list */
9165 		do {
9166 		        q_off = hit_info;
9167 			num_hits--;
9168 			hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
9169 			lookup_pos++;
9170 
9171 		    	diag = diag_tmp - q_off;
9172 
9173 			/* Check for extra bytes if required for longer words. */
9174 				/* extend to the right */
9175 				p = p_start;
9176 				q = query0 + q_off;
9177 				my_index=0;
9178 				while (extra_bytes_needed)
9179 				{
9180 				/* Note: no check is done that q[0-3] is not an ambiguity code.  Could be done, but might slow things down. */
9181 				   packed_query = (q[0]<<6) + (q[1]<<4) + (q[2]<<2) + q[3];
9182 					if (p != packed_query)
9183 						break;
9184 					q += 4;
9185 					extra_bytes_needed--;
9186 					my_index++;
9187 					p = *((Uint1Ptr) search->subject->sequence + s_off + my_index);
9188 				}
9189 				if (extra_bytes_needed)
9190 				{ /* extra_bytes_needed next round. */
9191 					extra_bytes_needed = extra_bytes;
9192 					continue; /* not enough bytes found. */
9193 				}
9194 				extra_bytes_needed = extra_bytes;
9195 
9196 
9197 			q = query0 + q_off - compression_factor;
9198 			if (s_off > compressed_wordsize)
9199 				p = *(subject0 + s_off - compressed_wordsize - 1);
9200 
9201 			/* extend to the left */
9202 			if (s_off == compressed_wordsize || READDB_UNPACK_BASE_4(p) != *--q || q < query0)
9203 			{
9204 				left = 0;
9205 			}
9206 			else
9207 			{
9208 				if (READDB_UNPACK_BASE_3(p) != *--q || q < query0)
9209 				{
9210 					left = 1;
9211 				}
9212 				else
9213 				{
9214 					if (READDB_UNPACK_BASE_2(p) != *--q || q < query0)
9215 					{
9216 						left = 2;
9217 					}
9218 					else
9219 					{
9220 						if (READDB_UNPACK_BASE_1(p) != *--q || q < query0)
9221 						{
9222 							left = 3;
9223 						}
9224 						else
9225 						{
9226 							left = 4;
9227 						}
9228 					}
9229 				}
9230 			}
9231 			/* extend to the right */
9232 			p = *(subject0 + s_off + extra_bytes_needed);
9233 			q = query0 + q_off + 4*extra_bytes_needed;
9234 			if (s+extra_bytes_needed >= s_end || READDB_UNPACK_BASE_1(p) != *q++ || q >= q_end)
9235 			{
9236 				right = 0;
9237 			}
9238 			else
9239 			{
9240 				if (READDB_UNPACK_BASE_2(p) != *q++ || q >= q_end)
9241 				{
9242 					right = 1;
9243 				}
9244 				else
9245 				{
9246 					if (READDB_UNPACK_BASE_3(p) != *q++ || q >= q_end)
9247 					{
9248 						right = 2;
9249 					}
9250 					else
9251 					{
9252 						if (READDB_UNPACK_BASE_4(p) != *q++ || q >= q_end)
9253 						{
9254 							right = 3;
9255 						}
9256 						else
9257 						{
9258 							right = 4;
9259 						}
9260 					}
9261 				}
9262 			}
9263 			if (left + right >= virtual_wordsize)
9264 			{
9265 				/* Check if this diagonal has already been explored. */
9266 		    		real_diag = diag & min_diag_mask;
9267 				if (combo_array[real_diag].diag_level >= (s_off*READDB_COMPRESSION_RATIO+offset))
9268 		    		{
9269 					continue;
9270 		    		}
9271 #ifdef BLAST_COLLECT_STATS
9272 				search->second_pass_hits++;
9273 #endif
9274 				if (BlastNtWordExtend(search, q_off, s_off, real_diag, search->first_context) != 0)
9275 					goto ErrorReturn;
9276 			}
9277 		} while (num_hits>0);
9278 	  } /* end for(;;) */
9279 	}else{
9280 	  /* Dense version - doesn't use pv_array */
9281 	    next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
9282 	    next_nhits_addr=&mod_lt[next_lindex].num_used ;
9283 	    next_nhits=*next_nhits_addr;
9284 
9285 	    for (;;) {
9286 	        do {
9287 			/* lookup a contiguous word. */
9288 			lookup_index = next_lindex;
9289 			s++;
9290 
9291 			if (s == s_end) goto NormalReturn;
9292 
9293         		next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
9294 
9295 			next_nhits_addr = &mod_lt[next_lindex].num_used;
9296 
9297 			num_hits = next_nhits;
9298 			next_nhits=*next_nhits_addr;
9299 		} while (num_hits == 0);
9300 
9301 		lookup_pos = mod_lt[lookup_index].entries;
9302 
9303 		s_off = s-subject0+1;
9304 		diag_tmp = s_off*READDB_COMPRESSION_RATIO + min_diag_length;
9305 		hit_info = *((Uint4 *) lookup_pos);
9306 		lookup_pos++;
9307 
9308 
9309 		if(num_hits>3){
9310 		  lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
9311 		}
9312 
9313 		p_start = *((Uint1Ptr) search->subject->sequence + s_off);
9314 		/* Extend each hit in the linked list */
9315 		do {
9316 		        q_off = hit_info;
9317 			num_hits--;
9318 			hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
9319 			lookup_pos++;
9320 
9321 		    	diag = diag_tmp - q_off;
9322 
9323 
9324 			/* Check for extra bytes if required for longer words. */
9325 				/* extend to the right */
9326 				p = p_start;
9327 				q = query0 + q_off;
9328 				my_index=0;
9329 				while (extra_bytes_needed)
9330 				{
9331 				/* Note: no check is done that q[0-3] is not an ambiguity code.  Could be done, but might slow things down. */
9332 				   packed_query = (q[0]<<6) + (q[1]<<4) + (q[2]<<2) + q[3];
9333 					if (p != packed_query)
9334 						break;
9335 					q += 4;
9336 					extra_bytes_needed--;
9337 					my_index++;
9338 					p = *((Uint1Ptr) search->subject->sequence + s_off + my_index);
9339 				}
9340 				if (extra_bytes_needed)
9341 				{ /* extra_bytes_needed next round. */
9342 					extra_bytes_needed = extra_bytes;
9343 					continue; /* not enough bytes found. */
9344 				}
9345 				extra_bytes_needed = extra_bytes;
9346 
9347 			q = query0 + q_off - compression_factor;
9348 			if (s_off > compressed_wordsize)
9349 				p = *(subject0 + s_off - compressed_wordsize - 1);
9350 
9351 			/* extend to the left */
9352 			if (s_off == compressed_wordsize || READDB_UNPACK_BASE_4(p) != *--q || q < query0)
9353 			{
9354 				left = 0;
9355 			}
9356 			else
9357 			{
9358 				if (READDB_UNPACK_BASE_3(p) != *--q || q < query0)
9359 				{
9360 					left = 1;
9361 				}
9362 				else
9363 				{
9364 					if (READDB_UNPACK_BASE_2(p) != *--q || q < query0)
9365 					{
9366 						left = 2;
9367 					}
9368 					else
9369 					{
9370 						if (READDB_UNPACK_BASE_1(p) != *--q || q < query0)
9371 						{
9372 							left = 3;
9373 						}
9374 						else
9375 						{
9376 							left = 4;
9377 						}
9378 					}
9379 				}
9380 			}
9381 			/* extend to the right */
9382 			p = *(subject0 + s_off + extra_bytes_needed);
9383 			q = query0 + q_off + 4*extra_bytes_needed;
9384 			if (s+extra_bytes_needed >= s_end || READDB_UNPACK_BASE_1(p) != *q++ || q >= q_end)
9385 			{
9386 				right = 0;
9387 			}
9388 			else
9389 			{
9390 				if (READDB_UNPACK_BASE_2(p) != *q++ || q >= q_end)
9391 				{
9392 					right = 1;
9393 				}
9394 				else
9395 				{
9396 					if (READDB_UNPACK_BASE_3(p) != *q++ || q >= q_end)
9397 					{
9398 						right = 2;
9399 					}
9400 					else
9401 					{
9402 						if (READDB_UNPACK_BASE_4(p) != *q++ || q >= q_end)
9403 						{
9404 							right = 3;
9405 						}
9406 						else
9407 						{
9408 							right = 4;
9409 						}
9410 					}
9411 				}
9412 			}
9413 			if (left + right >= virtual_wordsize)
9414 			{
9415 				/* Check if this diagonal has already been explored. */
9416 		    		real_diag = diag & min_diag_mask;
9417 				if (combo_array[real_diag].diag_level >= (s_off*READDB_COMPRESSION_RATIO+offset))
9418 		    		{
9419 					continue;
9420 		    		}
9421 #ifdef BLAST_COLLECT_STATS
9422 				search->second_pass_hits++;
9423 #endif
9424 				if (BlastNtWordExtend(search, q_off, s_off, real_diag, search->first_context) != 0)
9425 					goto ErrorReturn;
9426 			}
9427 		} while (num_hits>0);
9428 	    }
9429          }
9430 	}
9431 	else /* extra_bytes not needed. */
9432 	{
9433 	/** The first for() loop is optimized for sparse tables (which rarely hit), the second for dense */
9434 	if(pv_array){
9435 	  /* We use the pv_array here, since (on short-med queries) most lookups fail */
9436 	   next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
9437 	   next_pv_val = pv_array[next_lindex>>PV_ARRAY_BTS];
9438 
9439 	   for (;;) {
9440 	      do {
9441 		 /* lookup a contiguous word. */
9442 		 s++;
9443 		 lookup_index = next_lindex;
9444 
9445 		 if (s == s_end)
9446 		    goto NormalReturn;
9447 
9448 		 next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
9449 		 next_pv_array_addr = &pv_array[next_lindex>>PV_ARRAY_BTS];
9450 		 pv_val = next_pv_val;
9451 		 next_pv_val = *next_pv_array_addr;
9452 
9453 	      } while ((pv_val&(((PV_ARRAY_TYPE) 1)<<(lookup_index&PV_ARRAY_MASK))) == 0);
9454 
9455 		num_hits = mod_lt[lookup_index].num_used;
9456 		s_off = s-subject0+1;
9457 		diag_tmp = s_off*READDB_COMPRESSION_RATIO + min_diag_length;
9458 		lookup_pos = mod_lt[lookup_index].entries;
9459 		hit_info = *((Uint4 *) lookup_pos);
9460 		lookup_pos++;
9461 
9462 		if(num_hits>3){
9463 		  lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
9464 		}
9465 
9466 		/* Extend each hit in the linked list */
9467 		do {
9468 		        q_off = hit_info;
9469 			num_hits--;
9470 			hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
9471 			lookup_pos++;
9472 
9473 
9474 			q = query0 + q_off - compression_factor;
9475 
9476 			if (s_off > compressed_wordsize)
9477 				p = *(subject0 + s_off - compressed_wordsize - 1);
9478 
9479 		    	diag = diag_tmp - q_off;
9480 
9481 			/* extend to the left */
9482 			if (s_off == compressed_wordsize || READDB_UNPACK_BASE_4(p) != *--q || q < query0)
9483 			{
9484 				left = 0;
9485 			}
9486 			else
9487 			{
9488 				if (READDB_UNPACK_BASE_3(p) != *--q || q < query0)
9489 				{
9490 					left = 1;
9491 				}
9492 				else
9493 				{
9494 					if (READDB_UNPACK_BASE_2(p) != *--q || q < query0)
9495 					{
9496 						left = 2;
9497 					}
9498 					else
9499 					{
9500 						if (READDB_UNPACK_BASE_1(p) != *--q || q < query0)
9501 						{
9502 							left = 3;
9503 						}
9504 						else
9505 						{
9506 							left = 4;
9507 						}
9508 					}
9509 				}
9510 			}
9511 			/* extend to the right */
9512 			p = *(subject0 + s_off);
9513 			q = query0 + q_off;
9514 			if (s >= s_end || READDB_UNPACK_BASE_1(p) != *q++ || q >= q_end)
9515 			{
9516 				right = 0;
9517 			}
9518 			else
9519 			{
9520 				if (READDB_UNPACK_BASE_2(p) != *q++ || q >= q_end)
9521 				{
9522 					right = 1;
9523 				}
9524 				else
9525 				{
9526 					if (READDB_UNPACK_BASE_3(p) != *q++ || q >= q_end)
9527 					{
9528 						right = 2;
9529 					}
9530 					else
9531 					{
9532 						if (READDB_UNPACK_BASE_4(p) != *q++ || q >= q_end)
9533 						{
9534 							right = 3;
9535 						}
9536 						else
9537 						{
9538 							right = 4;
9539 						}
9540 					}
9541 				}
9542 			}
9543 			if (left + right >= virtual_wordsize)
9544 			{
9545 				/* Check if this diagonal has already been explored. */
9546 		    		real_diag = diag & min_diag_mask;
9547 				if (combo_array[real_diag].diag_level >= (s_off*READDB_COMPRESSION_RATIO+offset))
9548 		    		{
9549 					continue;
9550 		    		}
9551 #ifdef BLAST_COLLECT_STATS
9552 				search->second_pass_hits++;
9553 #endif
9554 				if (BlastNtWordExtend(search, q_off, s_off, real_diag, search->first_context) != 0)
9555 					goto ErrorReturn;
9556 			}
9557 		} while (num_hits>0);
9558 	  } /* end for(;;) */
9559 
9560 	}else{
9561 	  /* Dense version - doesn't use pv_array */
9562 	    next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
9563 	    next_nhits_addr=&mod_lt[next_lindex].num_used ;
9564 	    next_nhits=*next_nhits_addr;
9565 
9566 	    for (;;) {
9567 	        do {
9568 			/* lookup a contiguous word. */
9569 			lookup_index = next_lindex;
9570 			s++;
9571 
9572 			if (s == s_end) goto NormalReturn;
9573 
9574         		next_lindex = (((lookup_index) & mask)<<char_size) + *(s+1);
9575 
9576 			next_nhits_addr = &mod_lt[next_lindex].num_used;
9577 
9578 			num_hits = next_nhits;
9579 			next_nhits=*next_nhits_addr;
9580 		} while (num_hits == 0);
9581 
9582 		lookup_pos = mod_lt[lookup_index].entries;
9583 
9584 		s_off = s-subject0+1;
9585 		diag_tmp = s_off*READDB_COMPRESSION_RATIO + min_diag_length;
9586 		hit_info = *((Uint4 *) lookup_pos);
9587 		lookup_pos++;
9588 
9589 
9590 		if(num_hits>3){
9591 		  lookup_pos=*((ModLookupPositionPtr PNTR) lookup_pos);
9592 		}
9593 
9594 		/* Extend each hit in the linked list */
9595 		do {
9596 		        q_off = hit_info;
9597 			num_hits--;
9598 			hit_info = *((Uint4 *) lookup_pos); /* load next hit_info */
9599 			lookup_pos++;
9600 
9601 
9602 			q = query0 + q_off - compression_factor;
9603 
9604 			if (s_off > compressed_wordsize)
9605 				p = *(subject0 + s_off - compressed_wordsize - 1);
9606 
9607 		    	diag = diag_tmp - q_off;
9608 
9609 			/* extend to the left */
9610 			if (s_off == compressed_wordsize || READDB_UNPACK_BASE_4(p) != *--q || q < query0)
9611 			{
9612 				left = 0;
9613 			}
9614 			else
9615 			{
9616 				if (READDB_UNPACK_BASE_3(p) != *--q || q < query0)
9617 				{
9618 					left = 1;
9619 				}
9620 				else
9621 				{
9622 					if (READDB_UNPACK_BASE_2(p) != *--q || q < query0)
9623 					{
9624 						left = 2;
9625 					}
9626 					else
9627 					{
9628 						if (READDB_UNPACK_BASE_1(p) != *--q || q < query0)
9629 						{
9630 							left = 3;
9631 						}
9632 						else
9633 						{
9634 							left = 4;
9635 						}
9636 					}
9637 				}
9638 			}
9639 			/* extend to the right */
9640 			p = *(subject0 + s_off);
9641 			q = query0 + q_off;
9642 			if (s >= s_end || READDB_UNPACK_BASE_1(p) != *q++ || q >= q_end)
9643 			{
9644 				right = 0;
9645 			}
9646 			else
9647 			{
9648 				if (READDB_UNPACK_BASE_2(p) != *q++ || q >= q_end)
9649 				{
9650 					right = 1;
9651 				}
9652 				else
9653 				{
9654 					if (READDB_UNPACK_BASE_3(p) != *q++ || q >= q_end)
9655 					{
9656 						right = 2;
9657 					}
9658 					else
9659 					{
9660 						if (READDB_UNPACK_BASE_4(p) != *q++ || q >= q_end)
9661 						{
9662 							right = 3;
9663 						}
9664 						else
9665 						{
9666 							right = 4;
9667 						}
9668 					}
9669 				}
9670 			}
9671 			if (left + right >= virtual_wordsize)
9672 			{
9673 				/* Check if this diagonal has already been explored. */
9674 		    		real_diag = diag & min_diag_mask;
9675 				if (combo_array[real_diag].diag_level >= (s_off*READDB_COMPRESSION_RATIO+offset))
9676 		    		{
9677 					continue;
9678 		    		}
9679 #ifdef BLAST_COLLECT_STATS
9680 				search->second_pass_hits++;
9681 #endif
9682 				if (BlastNtWordExtend(search, q_off, s_off, real_diag, search->first_context) != 0)
9683 					goto ErrorReturn;
9684 			}
9685 		} while (num_hits>0);
9686 	    }
9687 	  }
9688 	}
9689 
9690 NormalReturn:
9691 	BlastExtendWordExit(search);
9692         return search->current_hitlist->hspcnt;
9693 
9694 ErrorReturn:
9695 	BlastExtendWordExit(search);
9696 	return 3;
9697 }
9698 
9699 static Int4
BlastPurgeResultList(BLASTResultHitlistPtr PNTR results,Int4 hitlist_count)9700 BlastPurgeResultList(BLASTResultHitlistPtr PNTR results, Int4 hitlist_count)
9701 {
9702 	Int4 index, index_new;
9703 
9704 	for (index=0; index<hitlist_count; index++)
9705 	{
9706 	   if (results[index]->num_ref <= 0) {
9707 	      if (results[index]->seqalign)
9708 		 SeqAlignSetFree(results[index]->seqalign);
9709 	      results[index] = BLASTResultHitlistFree(results[index]);
9710 	   }
9711 	}
9712 
9713 	index_new=0;
9714 	for (index=0; index < hitlist_count; index++)
9715 	{
9716 		if (results[index] != NULL)
9717 		{
9718 			results[index_new] = results[index];
9719 			index_new++;
9720 		}
9721 	}
9722   	for (index=index_new; index<hitlist_count; index++)
9723     		results[index] = NULL;
9724 
9725 	return index_new;
9726 }
9727 
9728 /* CC: Changed to have the same tie-breakers as score_compare_hsps */
BLASTResultHspScoreCmp(VoidPtr v1,VoidPtr v2)9729 int LIBCALLBACK BLASTResultHspScoreCmp(VoidPtr v1, VoidPtr v2)
9730 {
9731     BLASTResultHspPtr hsp1 = (BLASTResultHspPtr) v1;
9732     BLASTResultHspPtr hsp2 = (BLASTResultHspPtr) v2;
9733     int result = 0;             /* the result of the comparison */
9734     int query_end1, query_end2;
9735     int subject_end1, subject_end2;
9736 
9737     /* Null HSPs are "greater" than any non-null ones, so they go to the end
9738        of a sorted list. */
9739     if (!hsp1 && !hsp2)
9740         return 0;
9741     else if (!hsp1)
9742         return 1;
9743     else if (!hsp2)
9744         return -1;
9745 
9746     query_end1 = hsp1->query_offset + hsp1->query_length;
9747     query_end2 = hsp2->query_offset + hsp2->query_length;
9748     subject_end1 = hsp1->subject_offset + hsp1->subject_length;
9749     subject_end2 = hsp2->subject_offset + hsp2->subject_length;
9750 
9751     if (0 == (result = BLAST_CMP(hsp2->score,          hsp1->score)) &&
9752         0 == (result = BLAST_CMP(hsp1->subject_offset, hsp2->subject_offset)) &&
9753         0 == (result = BLAST_CMP(subject_end2,         subject_end1)) &&
9754         0 == (result = BLAST_CMP(hsp1->query_offset,   hsp2->query_offset))) {
9755         /* if all other test can't distinguish the HSPs, then the final
9756            test is the result */
9757         result = BLAST_CMP(query_end2, query_end1);
9758     }
9759     return result;
9760 }
9761 /*
9762 	Move the "current_hitlist" to the BLASTResultHitlistPtr
9763 	result_hitlist.  This function should be called after a
9764 	subject sequence has been thoroughly investigated.
9765 	If a hitlist is not significant, it will be deleted.  Note that
9766 	the actual sequence is not saved.  This can be retrieved later
9767 	with readdb when the formatting is done.
9768 
9769 	The number of significant HSP's is returned.
9770 */
9771 
9772 Int4 LIBCALL
BlastSaveCurrentHitlist(BlastSearchBlkPtr search)9773 BlastSaveCurrentHitlist(BlastSearchBlkPtr search)
9774 {
9775 	BLASTResultHitlistPtr result_hitlist, PNTR results;
9776 	BLASTResultsStructPtr result_struct;
9777 	BLAST_HitListPtr current_hitlist;
9778 	BLAST_HSPPtr hsp;
9779 	BLAST_KarlinBlkPtr kbp;
9780 	BLASTResultHspPtr hsp_array;
9781 	Int4 hspcnt, index, index1, new_index, old_index, low_index, high_index;
9782 	Int4 hitlist_count, hitlist_max, hspmax, hspset_cnt, high_score=0, retval;
9783 	Nlm_FloatHi current_evalue=DBL_MAX;
9784 	Int2 deleted;
9785 	Int4 query_length;
9786 
9787 	/* AM: Query multiplexing. */
9788 	QueriesPtr mult_queries = NULL;
9789 	Uint4 current_query = 0;
9790 	MQ_ResultInfoPtr result_info = NULL;
9791 	Int4 mq_new_index=0, del_index;
9792 	BLASTResultHitlistPtr mq_worst_result = NULL;
9793 	Uint4 tmp_num_results;
9794 
9795 	if (search == NULL)
9796 		return 0;
9797 
9798 	if (search->current_hitlist == NULL || search->current_hitlist->hspcnt <= 0)	/* No hits to save. */
9799 	{
9800 		search->subject_info = BLASTSubjectInfoDestruct(search->subject_info);
9801 		return 0;
9802 	}
9803 
9804         /* AM: Support for query concatenation. */
9805 	if( !search->mult_queries )
9806 	  current_hitlist = search->current_hitlist;
9807         else
9808 	  current_hitlist = search->mult_queries->HitListArray[
9809 	    search->mult_queries->current_query];
9810 
9811 	retval = current_hitlist->hspcnt;
9812 
9813 	/* AM: Support for query concatenation. */
9814 	if( search->mult_queries && !retval ) return 0;
9815 
9816 
9817 	if (search->pbp->gapped_calculation)
9818 		kbp = search->sbp->kbp_gap[search->first_context];
9819 	else
9820 		kbp = search->sbp->kbp[search->first_context];
9821 
9822 	if (search->prog_number==blast_type_blastn)
9823 	      query_length = search->query_context_offsets[search->first_context+1] - 1;
9824 	result_hitlist = BLASTResultHitlistNew(current_hitlist->hspcnt);
9825 	if (result_hitlist != NULL)
9826 	{
9827 		result_hitlist->subject_id = search->subject_id;
9828 		result_hitlist->subject_info = search->subject_info;
9829 		search->subject_info = NULL;
9830 
9831 		hspcnt = result_hitlist->hspcnt;
9832 		hsp_array = result_hitlist->hsp_array;
9833 		index1 = 0;
9834 		hspmax = current_hitlist->hspcnt_max;
9835 
9836 		hsp = current_hitlist->hsp_array[0];
9837 		hspset_cnt = -1;
9838 
9839 		for (index=0; index<hspcnt; index++)
9840 		{
9841 			while (hsp == NULL && index1 < hspmax)
9842 			{
9843 				index1++;
9844 				hsp = current_hitlist->hsp_array[index1];
9845 			}
9846 			if (index1==hspmax) break;
9847 			if (current_evalue > hsp->evalue)
9848 				current_evalue = hsp->evalue;
9849 			if (high_score < hsp->score)
9850 				high_score = hsp->score;
9851 			hsp_array[index].ordering_method = hsp->ordering_method;
9852 			hsp_array[index].number = hsp->num;
9853 			hsp_array[index].score = hsp->score;
9854 			hsp_array[index].e_value = hsp->evalue;
9855 			hsp_array[index].num_ident = hsp->num_ident;
9856 			hsp_array[index].bit_score = ((hsp->score*kbp->Lambda) -
9857 						      kbp->logK)/NCBIMATH_LN2;
9858 			if (search->prog_number==blast_type_blastn) {
9859                            if (search->last_context > 0 &&
9860                                hsp->query.offset >=
9861                                search->query_context_offsets[search->last_context]) {
9862                               hsp->context = 1;
9863                               hsp->query.offset -=
9864                                  search->query_context_offsets[hsp->context];
9865                               hsp->query.gapped_start -=
9866                                  search->query_context_offsets[hsp->context];
9867                            }
9868 
9869 			   if (hsp->context & 1)
9870 			      hsp_array[index].query_frame = -1;
9871 			   else
9872 			      hsp_array[index].query_frame = 1;
9873 			   hsp_array[index].query_gapped_start = hsp->query.gapped_start;
9874 			   hsp_array[index].subject_gapped_start = hsp->subject.gapped_start;
9875 			} else {
9876 			   hsp_array[index].query_frame = hsp->query.frame;
9877 			   hsp_array[index].query_gapped_start = hsp->query.gapped_start;
9878 			   hsp_array[index].subject_gapped_start =
9879 			      hsp->subject.gapped_start;
9880 			}
9881 			hsp_array[index].context = hsp->context;
9882 			hsp_array[index].query_offset = hsp->query.offset;
9883 			hsp_array[index].query_length = hsp->query.length;
9884 			hsp_array[index].subject_offset = hsp->subject.offset;
9885 			hsp_array[index].subject_length = hsp->subject.length;
9886 			hsp_array[index].subject_frame = hsp->subject.frame;;
9887 			hsp_array[index].point_back = result_hitlist;
9888 
9889 			if (hsp->start_of_chain)
9890 			{	/* starting new set of HSP's, incr count.*/
9891 				hspset_cnt++;
9892 			}
9893 			hsp_array[index].hspset_cnt = hspset_cnt;
9894 
9895 			index1++;
9896 			if (index1 >= hspmax)
9897 				break;
9898 			hsp = current_hitlist->hsp_array[index1];
9899 		}
9900 		/* Check if there were less HSPs than expected */
9901 		result_hitlist->hspcnt = index1;
9902 		result_hitlist->best_evalue = current_evalue;
9903 		result_hitlist->high_score = high_score;
9904 	}
9905 
9906 /* For MP BLAST we check that no other thread is attempting to insert results. */
9907 	if (search->thr_info->results_mutex)
9908             NlmMutexLock(search->thr_info->results_mutex);
9909 
9910 /* This is the structure that is identical on every thread. */
9911 	result_struct = search->result_struct;
9912 	hitlist_count = result_struct->hitlist_count;
9913 	hitlist_max = result_struct->hitlist_max;
9914 	results = result_struct->results;
9915 
9916         /* AM: Query multiplexing. */
9917 	if( search->mult_queries )
9918 	{
9919 	  mult_queries = search->mult_queries;
9920 	  current_query = mult_queries->current_query;
9921 	  result_info = mult_queries->result_info + current_query;
9922 	}
9923 
9924 	/* Record the worst evalue for ReevaluateWithAmbiguities. */
9925 	if (hitlist_count == hitlist_max)
9926 	{
9927 		search->worst_evalue = results[hitlist_count-1]->best_evalue;
9928 	}
9929 
9930         /* New hit is less significant than all the other hits. */
9931         if (hitlist_count > 0 && (current_evalue > results[hitlist_count-1]->best_evalue ||
9932         	(current_evalue >= results[hitlist_count-1]->best_evalue &&
9933                  high_score < results[hitlist_count-1]->high_score)))
9934         {
9935                 if (hitlist_count == hitlist_max)
9936                 {       /* Array is full, delete the entry. */
9937 		  if( !mult_queries ) /* AM: Query multiplexing. */
9938                         search->current_hitlist =
9939 			   BlastHitListDestruct(search->current_hitlist);
9940                   else search->mult_queries->delete_current_hitlist = TRUE;
9941 
9942                         result_hitlist = BLASTResultHitlistFreeEx(search, result_hitlist);
9943                         if (search->thr_info->results_mutex)
9944                             NlmMutexUnlock(search->thr_info->results_mutex); /* Free mutex. */
9945                         return 0;
9946                 }
9947                 else
9948                 {
9949 		  /* AM: Query multiplexing. */
9950 		  if( !mult_queries )
9951 		        /* Add to end of array. */
9952 	    		deleted = BlastInsertList2Heap(search, result_hitlist);
9953                   else
9954 		  {
9955 		    if( result_info->NumResults
9956 		          == mult_queries->max_results_per_query )
9957                     { /* AM: No more results for this query. */
9958 		      search->mult_queries->delete_current_hitlist = TRUE;
9959                       result_hitlist
9960 		        = BLASTResultHitlistFreeEx( search, result_hitlist );
9961 
9962                       if( search->thr_info->results_mutex )
9963 		        NlmMutexUnlock( search->thr_info->results_mutex );
9964 
9965                       return 0;
9966 		    }
9967 		    else /* AM: Append to results_struct and to local. */
9968 	    	      deleted = BlastInsertList2Heap(search, result_hitlist);
9969 		  }
9970                 }
9971 
9972 	        if (deleted == 1)
9973 		{
9974                   /* AM: Query multiplexing. */
9975 		  if( mult_queries ) MQ_UpdateResultLists( mult_queries );
9976 
9977 	        	hitlist_count = result_struct->hitlist_count =
9978 			   BlastPurgeResultList(results, hitlist_count);
9979                 }
9980 	    	else if (deleted == 0)
9981 		{
9982 	      		result_hitlist = BLASTResultHitlistFreeEx(search, result_hitlist);
9983 	      		if (search->thr_info->results_mutex)
9984                             NlmMutexUnlock(search->thr_info->results_mutex);	/* Free mutex. */
9985 	      		return retval;
9986 		}
9987                 new_index = hitlist_count;
9988 
9989 		/* AM: Query multiplexing. */
9990 		if( mult_queries ) mq_new_index = result_info->NumResults;
9991         }
9992         else
9993         {
9994 	  if (hitlist_count != 0)		/* The array is all NULL's if hitlist_count==0 */
9995 	  {
9996 	    deleted = BlastInsertList2Heap(search, result_hitlist);
9997 	    if (deleted == 1)
9998 	    {
9999               /* AM: Query multiplexing. */
10000 	      if( mult_queries ) MQ_UpdateResultLists( mult_queries );
10001 
10002 	      hitlist_count = result_struct->hitlist_count =
10003 		 BlastPurgeResultList(results, hitlist_count);
10004             }
10005 	    else if (deleted == 0) {
10006 	      result_hitlist = BLASTResultHitlistFreeEx(search, result_hitlist);
10007 	      if (search->thr_info->results_mutex)
10008 		NlmMutexUnlock(search->thr_info->results_mutex);	/* Free mutex. */
10009 	      return retval;
10010 	    }
10011 	    if (hitlist_count > 0)
10012 	    {
10013 	  	  high_index=0;
10014 		  low_index=hitlist_count-1;
10015 		  new_index = (high_index+low_index)/2;
10016 		  old_index = new_index;
10017 		  for (index=0; index<BLAST_SAVE_ITER_MAX; index++)
10018 		  {
10019 			if (results[new_index]->best_evalue > current_evalue)
10020 			{
10021 			    low_index = new_index;
10022 			}
10023 			else if (results[new_index]->best_evalue < current_evalue)
10024 			{
10025 			    high_index = new_index;
10026 			}
10027 			else
10028 			{ /* If e-values are the same, use high score. */
10029 			    /* If scores are the same, use ordinal number. */
10030 			    if (results[new_index]->high_score < high_score)
10031 			      low_index = new_index;
10032 			    else if (results[new_index]->high_score > high_score)
10033 			      high_index = new_index;
10034 			    else if (results[new_index]->subject_id < search->subject_id)
10035 			      low_index = new_index;
10036 			    else
10037 			      high_index = new_index;
10038 			}
10039 
10040 			new_index = (high_index+low_index)/2;
10041 			if (old_index == new_index)
10042 			{
10043 			    if (results[new_index]->best_evalue < current_evalue)
10044 			    { /* Perform this check as new_index get rounded DOWN above.*/
10045 				new_index++;
10046 			    }
10047 			    else if (results[new_index]->best_evalue == current_evalue && results[new_index]->high_score > high_score)
10048 			    {
10049 				new_index++;
10050 			    }
10051 			    break;
10052                         }
10053 			old_index = new_index;
10054                     }
10055 
10056 		    /* AM: Query multiplexing. */
10057 		    if( !mult_queries )
10058 		    {
10059 		      if (hitlist_count == hitlist_max)
10060 		      {	/* The list is full, delete the last entry. */
10061 			  BlastFreeHeap(search, results[hitlist_max-1]);
10062 			  if (results[hitlist_max-1]->seqalign)
10063 			     SeqAlignSetFree(results[hitlist_max-1]->seqalign);
10064 			  results[hitlist_max-1] = BLASTResultHitlistFreeEx(search, results[hitlist_max-1]);
10065 			  result_struct->hitlist_count--;
10066 			  hitlist_count = result_struct->hitlist_count;
10067 		      }
10068 		      if (hitlist_max > 1)
10069 		    	  Nlm_MemMove((results+new_index+1), (results+new_index), (hitlist_count-new_index)*sizeof(results[0]));
10070                     }
10071 		    else
10072 		    {
10073 		      new_index = ResultIndex( current_evalue, high_score,
10074 		                               search->subject_id,
10075 					       results, hitlist_count );
10076 
10077 		      tmp_num_results = result_info->NumResults;
10078 		      del_index = hitlist_count;
10079 		      mq_new_index = ResultIndex( current_evalue, high_score,
10080 		                                  search->subject_id,
10081                                                   result_info->results,
10082 						  result_info->NumResults );
10083 
10084 		      if( mq_new_index == mult_queries->max_results_per_query )
10085 		      { /* AM: The list is full and new result is too low --- do nothing. */
10086 			search->mult_queries->delete_current_hitlist = TRUE;
10087                         result_hitlist
10088 		          = BLASTResultHitlistFreeEx( search, result_hitlist );
10089 
10090                         if( search->thr_info->results_mutex )
10091 		          NlmMutexUnlock( search->thr_info->results_mutex );
10092 
10093                         return 0;
10094 		      }
10095 
10096 		      if( result_info->NumResults
10097 		            == mult_queries->max_results_per_query )
10098                       { /* AM: must remove the worst result for this query. */
10099 		        mq_worst_result
10100 			  = result_info->results[result_info->NumResults - 1];
10101                         --tmp_num_results;
10102 			del_index = ResultIndex1( mq_worst_result,
10103 			                          results, hitlist_count );
10104                         BlastFreeHeap( search, results[del_index] );
10105 
10106 			if( results[del_index]->seqalign )
10107 			  SeqAlignSetFree( results[del_index]->seqalign );
10108 
10109                         results[del_index]
10110 			  = BLASTResultHitlistFreeEx( search,
10111 			                              results[del_index] );
10112                         hitlist_count = --result_struct->hitlist_count;
10113 		      }
10114 
10115 		      if( hitlist_max > 1 )
10116 		        if( new_index < del_index )
10117 		          Nlm_MemMove( results + new_index + 1,
10118 			               results + new_index,
10119 				       (del_index - new_index)
10120 				         *sizeof( results[0] ) );
10121                         else if( del_index < new_index )
10122 			  Nlm_MemMove( results + del_index,
10123 			               results + del_index + 1,
10124 				       (new_index - del_index)
10125 				         *sizeof( results[0] ) );
10126 
10127                       if( mult_queries->max_results_per_query > 1 )
10128 		        Nlm_MemMove( result_info->results + mq_new_index + 1,
10129 			             result_info->results + mq_new_index,
10130 				     (result_info->NumResults - mq_new_index)
10131 				       *sizeof( results[0] ) );
10132 
10133                       result_info->NumResults = tmp_num_results;
10134 		    }
10135 	    }
10136 	    else
10137 	    {  /* Case of K=1 and the first hit is eliminated */
10138 	    	new_index = 0;
10139 	    	BlastInsertList2Heap(search, result_hitlist);
10140 
10141               /* AM: Query multiplexing. */
10142 	      if( mult_queries ) mq_new_index = 0;
10143 	    }
10144 	  }
10145 	else
10146 	  {	/* First hit to be stored. */
10147 	    new_index = 0;
10148 	    BlastInsertList2Heap(search, result_hitlist);
10149 
10150             /* AM: Query multiplexing. */
10151 	    if( mult_queries ) mq_new_index = 0;
10152 	  }
10153 	}
10154 
10155 	if (new_index < hitlist_max)
10156         {
10157 		results[new_index] = result_hitlist;
10158 		result_struct->hitlist_count++;
10159 
10160           /* AM: Query multiplexing. */
10161 	  if( mult_queries )
10162 	  {
10163 	    result_info->results[mq_new_index] = result_hitlist;
10164 	    ++result_info->NumResults;
10165 	  }
10166 	}
10167 
10168         /* We need to sort all hits by score/e_value in results[new_index] */
10169 
10170         HeapSort(results[new_index]->hsp_array, results[new_index]->hspcnt,
10171                  sizeof(BLASTResultHsp), BLASTResultHspScoreCmp);
10172 
10173         /* --------------------------------------------------------------- */
10174 
10175 	if (search->thr_info->results_mutex)
10176             NlmMutexUnlock(search->thr_info->results_mutex);	/* Free mutex. */
10177 	return retval;
10178 }
10179 
10180 Int2
blast_set_parameters(BlastSearchBlkPtr search,Nlm_FloatHi dropoff_number_of_bits_1st_pass,Nlm_FloatHi dropoff_number_of_bits_2nd_pass,Nlm_FloatHi avglen,Nlm_FloatHi searchsp,Int4 window)10181 blast_set_parameters(BlastSearchBlkPtr search,
10182 	Nlm_FloatHi dropoff_number_of_bits_1st_pass,
10183 	Nlm_FloatHi dropoff_number_of_bits_2nd_pass,
10184 	Nlm_FloatHi	avglen, /* Average length of a sequence. */
10185 	Nlm_FloatHi	searchsp, /* total search space. */
10186 	Int4 window) /* length where two hits must be found to count. */
10187 {
10188 	BLAST_ExtendWordPtr	ewp;
10189 	BLAST_KarlinBlkPtr	kbp, kbp_gap;
10190 	BLAST_ParameterBlkPtr	pbp;
10191 	BLAST_ScoreBlkPtr	sbp;
10192 	BLAST_Score	s, s2;
10193 	BLAST_Score	dropoff_1st_pass, dropoff_2nd_pass;
10194 	Int2 index;
10195 	Int4 i; /* AM: Support for query multiplexing. */
10196 
10197 	Nlm_FloatHi meff, e, e2;
10198         Int2 last_context;
10199 
10200 	if (search == NULL)
10201 		return 1;
10202 
10203 	pbp = search->pbp;
10204 	if (pbp == NULL)
10205 		return 1;
10206 
10207 	sbp = search->sbp;
10208 	if (sbp == NULL)
10209 		return 1;
10210 
10211 	/* Do for first context only, should this be changed?? */
10212 	kbp_gap = sbp->kbp_gap[search->first_context];
10213 	kbp = sbp->kbp[search->first_context];
10214 	if (kbp == NULL && kbp_gap == NULL)
10215 		return 1;
10216 
10217         last_context = (search->prog_number == blast_type_blastn) ?
10218            search->first_context : search->last_context;
10219 	for (index=search->first_context; index<=last_context; index++)
10220 	{
10221 		ewp = search->context[index].ewp;
10222 		if (ewp == NULL && !pbp->mb_params)
10223 			return 1;
10224 
10225 	}
10226 
10227 	s = pbp->cutoff_s;
10228 	e = pbp->cutoff_e;
10229 	s2 = pbp->cutoff_s2;
10230 	e2 = pbp->cutoff_e2;
10231 	if (pbp->cutoff_s_set && !pbp->cutoff_e_set)
10232 		e = 0.;
10233 
10234 	meff = (Nlm_FloatHi) search->context[search->first_context].query->length;
10235 	if (pbp->mb_params)
10236 	   BlastCutoffs(&s, &e, kbp, searchsp, TRUE, search->pbp->gap_decay_rate );
10237 	else
10238 	{
10239 	   if (pbp->gapped_calculation)
10240      { /* AM: Changed to support query concatenation. */
10241        if( !search->mult_queries )
10242          BlastCutoffs(&s, &e, kbp_gap, searchsp, FALSE, 0.0 );
10243        else
10244          BlastCutoffs( &s, &e, kbp_gap,
10245                        search->mult_queries->MinSearchSpEff,
10246                        FALSE, 0.0 );
10247      }
10248 	   else
10249      { /* AM: Changed to support query concatenation. */
10250        if( !search->mult_queries )
10251          BlastCutoffs(&s, &e, kbp, searchsp, FALSE, 0.0 );
10252        else
10253          BlastCutoffs( &s, &e, kbp, search->mult_queries->MinSearchSpEff,
10254                         FALSE, 0.0 );
10255      }
10256 	}
10257 	/* Determine the secondary cutoff score, S2, to use */
10258 	if (e2 == 0. && !pbp->cutoff_s2_set)
10259 		s2 = s;
10260 
10261 	if ((pbp->cutoff_e2_set && !pbp->cutoff_s2_set && e2 == 0.) ||
10262 		(pbp->cutoff_s2_set && s2 > s))
10263 	{
10264 		e2 = 0., s2 = s;
10265 	}
10266 	else
10267 	{
10268 		/* e2 = MIN(e, e2); */
10269 		if (pbp->cutoff_s2_set && !pbp->cutoff_e2_set)
10270 			e2 = 0.;
10271 /*
10272 		BlastCutoffs(&s2, &e2, kbp, meff, avglen, TRUE);
10273 */
10274     if (pbp->gapped_calculation)
10275     {
10276       if( !search->mult_queries )
10277         BlastCutoffs(&s2, &e2, kbp_gap, (FloatHi) MIN(avglen,meff) * (FloatHi) avglen,
10278                      TRUE, search->pbp->gap_decay_rate );
10279       else
10280         BlastCutoffs( &s2, &e2, kbp_gap,
10281                       (FloatHi) MIN( avglen,search->mult_queries->MinLen ) * (FloatHi) avglen,
10282                       TRUE, search->pbp->gap_decay_rate );
10283     }
10284     else
10285     { /* AM: Changed to support query concatenation. */
10286       if( !search->mult_queries )
10287         BlastCutoffs(&s2, &e2, kbp, (FloatHi) MIN(avglen,meff) * (FloatHi) avglen,
10288                      TRUE, search->pbp->gap_decay_rate );
10289       else
10290         BlastCutoffs( &s2, &e2, kbp,
10291                       (FloatHi) MIN(avglen,2*(search->mult_queries->MinLen)) * (FloatHi) avglen,
10292                       TRUE, search->pbp->gap_decay_rate );
10293     }
10294 		/* Adjust s2 to be in line with s, as necessary */
10295 		s2 = MAX(s2, 1);
10296 		if (s2 > s)
10297 			s2 = s;
10298 /*
10299 		e2 = BlastKarlinStoE_simple(s2, kbp, searchsp);
10300 */
10301 	}
10302 
10303 	if (pbp->cutoff_s2_set)
10304 		pbp->cutoff_s2_max = s2;
10305 	else
10306 		pbp->cutoff_s2_max = s;
10307 
10308 	if (pbp->do_sum_stats)
10309 		pbp->cutoff_s1 = s2;
10310 	else
10311 		pbp->cutoff_s1 = s;
10312 
10313 	if (pbp->gapped_calculation && search->prog_number != blast_type_blastn)
10314 	{
10315 		pbp->gap_trigger = MIN(pbp->gap_trigger, s2);
10316 		s2 = MIN(pbp->gap_trigger, s2);
10317 	}
10318 
10319 	dropoff_1st_pass = (BLAST_Score) ceil((Nlm_FloatHi) dropoff_number_of_bits_1st_pass * NCBIMATH_LN2 / kbp->Lambda);
10320 	dropoff_1st_pass = (BLAST_Score) MIN((Nlm_FloatHi) dropoff_1st_pass, s);
10321 
10322 	/* AM: Change to support query multiplexing. */
10323 	if( search->prog_number == blast_type_tblastn && search->mult_queries )
10324 	  dropoff_2nd_pass = (BLAST_Score)ceil(
10325 	    (Nlm_FloatHi)dropoff_number_of_bits_2nd_pass*NCBIMATH_LN2
10326 	      /search->mult_queries->LambdaMin );
10327 	else
10328 	  dropoff_2nd_pass = (BLAST_Score) ceil((Nlm_FloatHi) dropoff_number_of_bits_2nd_pass * NCBIMATH_LN2 / kbp->Lambda);
10329 
10330 	dropoff_2nd_pass = (BLAST_Score) MIN((Nlm_FloatHi) dropoff_2nd_pass, s);
10331 
10332 	/* AM: Change to support query multiplexing. */
10333 	if( search->prog_number == blast_type_tblastn && search->mult_queries )
10334 	  for( i = 0; i < search->mult_queries->NumQueries; ++i )
10335 	    search->mult_queries->dropoff_2nd_pass_array[i]
10336 	      = - (BLAST_Score)ceil( (Nlm_FloatHi)dropoff_number_of_bits_2nd_pass*NCBIMATH_LN2
10337 	                             /search->mult_queries->lambda_array[i] );
10338 
10339 	/* The drop-off parameter MUST be negative. */
10340 	pbp->dropoff_1st_pass = -dropoff_1st_pass;
10341 	pbp->dropoff_2nd_pass = -dropoff_2nd_pass;
10342 	pbp->cutoff_s = s;
10343 	pbp->cutoff_e = e;
10344 	pbp->cutoff_s2 = s2;
10345 	pbp->cutoff_e2 = e2;
10346 
10347 /* The first and second pass S2 values are from formula by Stephen Altschul.*/
10348 /* If no bits were specified on the command line, then the following
10349 formula is used:
10350 	calculate ln(25000*query_length*K)/lambda
10351 
10352 	and
10353 
10354 		21(bits)*ln2/lammbda
10355 
10356 Take the smaller of those two formulas.
10357 */
10358 	if (pbp->number_of_bits == 0.0)
10359 	{
10360 		pbp->cutoff_s_first = (BLAST_Score) MIN(log((Nlm_FloatHi)(25000*(kbp->K)*(search->context[search->first_context].query->length)))/kbp->Lambda, 21*NCBIMATH_LN2/kbp->Lambda);
10361 		/* Adjust the cutoff value for translating searches. */
10362 		pbp->cutoff_s_first += (BLAST_Score)
10363                     (log((Nlm_FloatHi)search->context_factor)/kbp->Lambda);
10364 	}
10365 	else
10366 	{
10367 		pbp->cutoff_s_first = (BLAST_Score) (pbp->number_of_bits*NCBIMATH_LN2 / kbp->Lambda);
10368 	}
10369 
10370 /* This value is used only if the "old" statistics are used.  If not an
10371 individual cutoff score is calculated for each subject sequence in
10372 CalculateSecondCutoffScore. */
10373 
10374 	pbp->cutoff_s_second = s2;
10375 
10376 	/* If we're just collecting HSP's, use one cutoff. */
10377 	if (!pbp->gapped_calculation && !pbp->do_sum_stats)
10378 	{
10379 		pbp->cutoff_s2 = MAX(pbp->cutoff_s, pbp->cutoff_s2);
10380 		pbp->cutoff_s2_max = MAX(pbp->cutoff_s, pbp->cutoff_s2);
10381 	}
10382 
10383 	return 0;
10384 }
10385 
10386 /*
10387 	Arrange the HSP's (on every HitList) for linking by "link_hsps".
10388 
10389 	link_hsps requires an array of HSP's and the first member of this
10390 	array is used just to hold the HSP's (i.e., not a real HSP).
10391 
10392 	Could this all be integrated with link_hsp's??
10393 */
10394 static Int2
10395 new_link_hsps(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist);
10396 
10397 Int2 LIBCALL
BlastLinkHsps(BlastSearchBlkPtr search)10398 BlastLinkHsps (BlastSearchBlkPtr search)
10399 
10400 {
10401 	BLAST_HitListPtr hitlist, orig_hitlist;
10402 	BLAST_HSPPtr hsp;
10403 	Int4 index;
10404     Int2 status = 0;
10405 
10406         /* AM: Support for query concatenation. */
10407         if( search->mult_queries && search->mult_queries->use_mq )
10408         {
10409                 orig_hitlist = search->current_hitlist;
10410                 search->current_hitlist = search->mult_queries->HitListArray[
10411                         search->mult_queries->current_query];
10412         }
10413 
10414         hitlist = search->current_hitlist;
10415 
10416 	if (hitlist && hitlist->hspcnt > 0)
10417 	{
10418         /* For ungapped blastn, assign frames to all HSPs,
10419            since this is necessary for linking, and frames have not yet been
10420            assigned. Do it only if both strands are searched. Note that
10421            we don't assign context numbers here because the offsets have
10422            not yet been adjusted to be relative to individual contexts. */
10423         if (search->prog_number == blast_type_blastn &&
10424             search->last_context > search->first_context) {
10425 
10426             for (index = 0; index < hitlist->hspcnt; ++index) {
10427                 if (hitlist->hsp_array[index]->query.offset >=
10428                     search->query_context_offsets[search->last_context]) {
10429                     hitlist->hsp_array[index]->query.frame = -1;
10430                 }
10431             }
10432         }
10433 
10434 
10435         /* Link up the HSP's for this hitlist. */
10436         if (search->pbp->longest_intron <= 0 ||
10437             (search->prog_number != blast_type_tblastn &&
10438              search->prog_number != blast_type_psitblastn &&
10439              search->prog_number != blast_type_blastx))
10440         {
10441             hsp = link_hsps(search, hitlist, hitlist->hsp_array);
10442             /* The HSP's may be in a different order than they were before,
10443                but hsp contains the first one. */
10444             for (index=0; index<hitlist->hspcnt; index++) {
10445                 hitlist->hsp_array[index] = hsp;
10446                 hsp = hsp->next;
10447             }
10448         } else {
10449             status = new_link_hsps(search, hitlist);
10450         }
10451 	}
10452 
10453         /* AM: Support for query concatenation. */
10454         if( search->mult_queries && search->mult_queries->use_mq )
10455                 search->current_hitlist = orig_hitlist;
10456 
10457 	return status;
10458 }
10459 
10460 /*
10461 	Sort the HSP's by starting position of the query.  Called by HeapSort.
10462 	The first function sorts in forward, the second in reverse order.
10463 */
10464 
10465 static int LIBCALLBACK
fwd_compare_hsps(VoidPtr v1,VoidPtr v2)10466 fwd_compare_hsps(VoidPtr v1, VoidPtr v2)
10467 
10468 {
10469 	BLAST_HSPPtr h1, h2;
10470 	BLAST_HSPPtr PNTR hp1, PNTR hp2;
10471 
10472 	hp1 = (BLAST_HSPPtr PNTR) v1;
10473 	hp2 = (BLAST_HSPPtr PNTR) v2;
10474 	h1 = *hp1;
10475 	h2 = *hp2;
10476 
10477 	if (SIGN(h1->query.frame) != SIGN(h2->query.frame))
10478 	{
10479 		if (h1->query.frame < h2->query.frame)
10480 			return 1;
10481 		else
10482 			return -1;
10483 	}
10484 	if (h1->query.offset < h2->query.offset)
10485 		return -1;
10486 	if (h1->query.offset > h2->query.offset)
10487 		return 1;
10488 	/* Necessary in case both HSP's have the same query offset. */
10489 	if (h1->subject.offset < h2->subject.offset)
10490 		return -1;
10491 	if (h1->subject.offset > h2->subject.offset)
10492 		return 1;
10493 
10494 	return 0;
10495 }
10496 
10497 static int LIBCALLBACK
rev_compare_hsps(VoidPtr v1,VoidPtr v2)10498 rev_compare_hsps(VoidPtr v1, VoidPtr v2)
10499 
10500 {
10501 	BLAST_HSPPtr h1, h2;
10502 	BLAST_HSPPtr PNTR hp1, PNTR hp2;
10503 
10504 	hp1 = (BLAST_HSPPtr PNTR) v1;
10505 	hp2 = (BLAST_HSPPtr PNTR) v2;
10506 	h1 = *hp1;
10507 	h2 = *hp2;
10508 
10509 	if (SIGN(h1->query.frame) != SIGN(h2->query.frame))
10510 	{
10511 		if (h1->query.frame > h2->query.frame)
10512 			return 1;
10513 		else
10514 			return -1;
10515 	}
10516 
10517 	if (h1->query.offset < h2->query.offset)
10518 		return  1;
10519 	if (h1->query.offset > h2->query.offset)
10520 		return -1;
10521 	if (h1->query.end < h2->query.end)
10522 		return  1;
10523 	if (h1->query.end > h2->query.end)
10524 		return -1;
10525 	if (h1->subject.offset < h2->subject.offset)
10526 		return  1;
10527 	if (h1->subject.offset > h2->subject.offset)
10528 		return -1;
10529 	if (h1->subject.end < h2->subject.end)
10530 		return  1;
10531 	if (h1->subject.end > h2->subject.end)
10532 		return -1;
10533 	return 0;
10534 }
10535 
10536 
10537 static int LIBCALLBACK
rev_compare_hsps_cfj(VoidPtr v1,VoidPtr v2)10538 rev_compare_hsps_cfj(VoidPtr v1, VoidPtr v2)
10539 
10540 {
10541 	BLAST_HSPPtr h1, h2;
10542 	BLAST_HSPPtr PNTR hp1, PNTR hp2;
10543 
10544 	hp1 = (BLAST_HSPPtr PNTR) v1;
10545 	hp2 = (BLAST_HSPPtr PNTR) v2;
10546 	h1 = *hp1;
10547 	h2 = *hp2;
10548 
10549 	if (SIGN(h1->query.frame) != SIGN(h2->query.frame))
10550 	{
10551 		if (h1->query.frame > h2->query.frame)
10552 			return -1;
10553 		else
10554 			return 1;
10555 	}
10556 
10557 	if (SIGN(h1->subject.frame) != SIGN(h2->subject.frame))
10558 	{
10559 		if (h1->subject.frame > h2->subject.frame)
10560 			return 1;
10561 		else
10562 			return -1;
10563 	}
10564 
10565 	if (h1->query.offset < h2->query.offset)
10566 		return  1;
10567 	if (h1->query.offset > h2->query.offset)
10568 		return -1;
10569 	if (h1->query.end < h2->query.end)
10570 		return  1;
10571 	if (h1->query.end > h2->query.end)
10572 		return -1;
10573 	if (h1->subject.offset < h2->subject.offset)
10574 		return  1;
10575 	if (h1->subject.offset > h2->subject.offset)
10576 		return -1;
10577 	if (h1->subject.end < h2->subject.end)
10578 		return  1;
10579 	if (h1->subject.end > h2->subject.end)
10580 		return -1;
10581 	return 0;
10582 }
10583 
SumHSPEvalue(BlastSearchBlkPtr search,BLAST_HSPPtr head_hsp,BLAST_HSPPtr hsp,Nlm_FloatHi * xsum)10584 static FloatHi SumHSPEvalue(BlastSearchBlkPtr search, BLAST_HSPPtr head_hsp,
10585                             BLAST_HSPPtr hsp, Nlm_FloatHi *xsum)
10586 {
10587    FloatHi gap_decay_rate, sum_evalue;
10588    Int4 gap_size, num, subject_length;
10589 
10590    /* AM: The following are added for query multiplexing. */
10591    Int4 effective_length, length_adjustment;
10592    Uint4 qnum;
10593    Int8 dblen_eff;
10594 
10595    if( search->mult_queries )
10596    {
10597 	qnum = GetQueryNum( search->mult_queries,
10598                             head_hsp->query.offset,
10599                             head_hsp->query.end,
10600                             head_hsp->query.frame );
10601 
10602         effective_length  = search->mult_queries->EffLengths[qnum];
10603         length_adjustment = search->mult_queries->Adjustments[qnum];
10604         dblen_eff         = search->mult_queries->DbLenEff[qnum];
10605    }
10606    else
10607    {
10608         effective_length  = search->context[search->first_context].query->effective_length;
10609         length_adjustment = search->length_adjustment;
10610         dblen_eff         = search->dblen_eff;
10611    }
10612 
10613    gap_size = search->pbp->gap_size;
10614    gap_decay_rate = search->pbp->gap_decay_rate;
10615    num = head_hsp->num + hsp->num;
10616    subject_length = MAX((search->subject->length - length_adjustment), 1);
10617 
10618    if (search->prog_number == blast_type_tblastn ||
10619        search->prog_number == blast_type_blastx ||
10620        search->prog_number == blast_type_psitblastn) {
10621      subject_length /= 3;
10622    }
10623    subject_length = MAX(subject_length, 1);
10624 
10625    *xsum = head_hsp->xsum + hsp->xsum;
10626 
10627    sum_evalue =
10628       BlastUnevenGapSumE(LINK_HSP_OVERLAP + search->pbp->gap_size + 1,
10629                          search->pbp->longest_intron + LINK_HSP_OVERLAP + 1,
10630                          num, *xsum,
10631                          effective_length,
10632                          subject_length,
10633                          dblen_eff,
10634                          BlastGapDecayDivisor(gap_decay_rate, num));
10635 
10636    return sum_evalue;
10637 }
10638 
10639 
10640 static int LIBCALLBACK
xsum_compare_hsps(VoidPtr v1,VoidPtr v2)10641 xsum_compare_hsps(VoidPtr v1, VoidPtr v2)
10642 
10643 {
10644 	BLAST_HSPPtr h1, h2;
10645 	BLAST_HSPPtr PNTR hp1, PNTR hp2;
10646 
10647 	hp1 = (BLAST_HSPPtr PNTR) v1;
10648 	hp2 = (BLAST_HSPPtr PNTR) v2;
10649 	h1 = *hp1;
10650 	h2 = *hp2;
10651 
10652     if (h1 == NULL) {
10653         return (h2 == NULL) ? 0 : 1;
10654     } else if (h2 == NULL) {
10655       return -1;
10656     }
10657 
10658     if (h1->xsum < h2->xsum)
10659         return 1;
10660     if (h1->xsum > h2->xsum)
10661         return -1;
10662 
10663     return score_compare_hsps(&h1, &h2);
10664 }
10665 
10666 
10667 /** Merges HSPs from two linked HSP sets into an array of HSPs, sorted
10668  * in increasing order of contexts and increasing order of query
10669  * offsets.
10670  * @param hsp_set1 First linked set. [in]
10671  * @param hsp_set2 Second linked set. [in]
10672  * @param merged_size The total number of HSPs in two sets. [out]
10673  * @return The array of pointers to HSPs representing a merged set.
10674  */
10675 static BLAST_HSPPtr *
BLAST_HSPMergedLinkedSet(BLAST_HSPPtr hsp_set1,BLAST_HSPPtr hsp_set2,Int4 * merged_size)10676 BLAST_HSPMergedLinkedSet(BLAST_HSPPtr hsp_set1, BLAST_HSPPtr hsp_set2,
10677                          Int4* merged_size)
10678 {
10679     Int4 index;
10680     Int4 length;
10681     BLAST_HSPPtr * merged_hsps;
10682 
10683     /* Find the first link of the old HSP chain. */
10684     while (hsp_set1->prev)
10685         hsp_set1 = hsp_set1->prev;
10686     /* Find first and last link in the new HSP chain. */
10687     while (hsp_set2->prev)
10688         hsp_set2 = hsp_set2->prev;
10689 
10690     *merged_size = length = hsp_set1->num + hsp_set2->num;
10691 
10692     if( *merged_size == 0 ) return NULL;
10693 
10694     merged_hsps = (BLAST_HSPPtr*) MemNew(length*sizeof(BLAST_HSPPtr));
10695 
10696     index = 0;
10697     while (hsp_set1 || hsp_set2) {
10698         /* NB: HSP sets for which some HSPs have identical query
10699            offsets cannot possibly be admissible, so it doesn't matter
10700            how to deal with equal offsets. */
10701         if (!hsp_set2 || (hsp_set1 &&
10702             hsp_set1->query.offset < hsp_set2->query.offset)) {
10703             merged_hsps[index] = hsp_set1;
10704             hsp_set1 = hsp_set1->next;
10705         } else {
10706             merged_hsps[index] = hsp_set2;
10707             hsp_set2 = hsp_set2->next;
10708         }
10709         ++index;
10710     }
10711 
10712     return merged_hsps;
10713 }
10714 
10715 
10716 /** Combines two linked sets of HSPs into a single set; the original
10717  *  linked sets are consumed by this operation.
10718  *
10719  * @param hsp_set1 First set of HSPs [in]
10720  * @param hsp_set2 Second set of HSPs [in]
10721  * @param sum_score The sum score of the combined linked set
10722  * @param evalue The E-value of the combined linked set
10723  * @return Combined linked set.
10724  */
10725 static BLAST_HSPPtr
BLAST_HSPCombineLinkedSets(BLAST_HSPPtr hsp_set1,BLAST_HSPPtr hsp_set2,Nlm_FloatHi sum_score,Nlm_FloatHi evalue)10726 BLAST_HSPCombineLinkedSets(BLAST_HSPPtr hsp_set1, BLAST_HSPPtr hsp_set2,
10727                            Nlm_FloatHi sum_score, Nlm_FloatHi evalue)
10728 {
10729     BLAST_HSPPtr* merged_hsps;
10730     BLAST_HSPPtr head_hsp;
10731     Int4 index, new_num;
10732 
10733     if (!hsp_set2)
10734         return hsp_set1;
10735     else if (!hsp_set1)
10736         return hsp_set2;
10737 
10738     merged_hsps = BLAST_HSPMergedLinkedSet(hsp_set1, hsp_set2, &new_num);
10739 
10740     head_hsp = merged_hsps[0];
10741     head_hsp->start_of_chain = TRUE;
10742     head_hsp->prev = NULL;
10743     for (index = 0; index < new_num; ++index) {
10744         BLAST_HSPPtr link = merged_hsps[index];
10745         if (index < new_num - 1) {
10746             BLAST_HSPPtr next_link = merged_hsps[index+1];
10747             link->next = next_link;
10748             next_link->prev = link;
10749         } else {
10750             link->next = NULL;
10751         }
10752         link->xsum = sum_score;
10753         link->evalue = evalue;
10754         link->num = new_num;
10755         link->linked_set = TRUE;
10756         if (link != head_hsp)
10757             link->start_of_chain = FALSE;
10758     }
10759 
10760     MemFree(merged_hsps);
10761     return head_hsp;
10762 }
10763 
10764 
10765 /** Given an array of HSPs (H), sorted in increasing order of query
10766  * offsets, fills an array of indices into array H such that for each
10767  * i, the index is the smallest HSP index, for which query ending
10768  * offset is >= than query ending offset of H[i]. This indexing is
10769  * performed before any of the HSPs in H are linked.
10770  *
10771  * @param hsp_array       Array HSPs [in]
10772  * @param hspcnt Size of  the hsp_array. [in]
10773  * @param qend_index_ptr  Pointer to the new array of indices.
10774  */
10775 static Int2
BLAST_HSPArrayIndexQueryEnds(BLAST_HSPPtr * hsp_array,Int4 hspcnt,Int4 ** qend_index_ptr)10776 BLAST_HSPArrayIndexQueryEnds(BLAST_HSPPtr* hsp_array, Int4 hspcnt,
10777                              Int4** qend_index_ptr)
10778 {
10779     Int4 index;
10780     Int4* qend_index_array = NULL;
10781     BLAST_HSPPtr link;
10782     Int4 current_end = 0;
10783     Int4 current_index = 0;
10784 
10785     /* Allocate the array. */
10786     *qend_index_ptr = qend_index_array =
10787         (Int4*) Nlm_Calloc(hspcnt, sizeof(Int4));
10788     if (!qend_index_array)
10789         return -1;
10790 
10791     current_end = hsp_array[0]->query.end;
10792 
10793     for (index = 1; index < hspcnt; ++index) {
10794         link = hsp_array[index];
10795         if (link->context > hsp_array[current_index]->context ||
10796             link->query.end > current_end) {
10797             current_index = index;
10798             current_end = link->query.end;
10799         }
10800         qend_index_array[index] = current_index;
10801     }
10802     return 0;
10803 }
10804 
10805 
10806 /** Find an HSP on the same context as the one given, with closest
10807  * start offset that is greater than a specified value. The list of
10808  * HSPs to search must be sorted by query offset and in increasing
10809  * order of contexts.
10810  * @param hsp_array   Array of pointers to HSPs [in]
10811  * @param size        Number of elements in the array [in]
10812  * @param context     Context of the target HSP [in]
10813  * @param offset      The target offset to search for [in]
10814  * @return The index in the array of the HSP whose start/end offset
10815  *         is closest to but >= the value 'offset'
10816  */
10817 static Int4
BLAST_HSPOffsetBinarySearch(BLAST_HSPPtr * hsp_array,Int4 size,Int4 context,Int4 offset)10818 BLAST_HSPOffsetBinarySearch(BLAST_HSPPtr* hsp_array, Int4 size,
10819                             Int4 context, Int4 offset)
10820 {
10821    Int4 index, begin, end;
10822 
10823    begin = 0;
10824    end = size;
10825    while (begin < end) {
10826       index = (begin + end) / 2;
10827 
10828       if (hsp_array[index]->context < context)
10829           begin = index + 1;
10830       else if (hsp_array[index]->context > context)
10831           end = index;
10832       else {
10833           if (hsp_array[index]->query.offset >= offset)
10834               end = index;
10835           else
10836               begin = index + 1;
10837       }
10838    }
10839 
10840    return end;
10841 }
10842 
10843 
10844 /** Find an HSP in an array sorted in increasing order of query
10845  * offsets and increasing order of contexts, with the smallest index
10846  * such that its query end is >= to a given offset.
10847  *
10848  * @param hsp_array   Array of pointers to HSPs. [in]
10849  * @param size        Number of elements in the array [in]
10850  * @param qend_index_array  Array indexing query ends in the hsp_array [in]
10851  * @param context     Context of the target HSP [in]
10852  * @param offset      The target offset to search for [in]
10853  * @return            The found index in the hsp_array.
10854  */
10855 static Int4
BLAST_HSPOffsetEndBinarySearch(BLAST_HSPPtr * hsp_array,Int4 size,Int4 * qend_index_array,Int4 context,Int4 offset)10856 BLAST_HSPOffsetEndBinarySearch(BLAST_HSPPtr* hsp_array, Int4 size,
10857                                Int4* qend_index_array, Int4 context,
10858                                Int4 offset)
10859 {
10860    Int4 begin, end;
10861 
10862    begin = 0;
10863    end = size;
10864    while (begin < end) {
10865        Int4 right_index = (begin + end) / 2;
10866        Int4 left_index = qend_index_array[right_index];
10867 
10868        if (hsp_array[right_index]->context < context)
10869            begin = right_index + 1;
10870        else if (hsp_array[right_index]->context > context)
10871            end = left_index;
10872        else {
10873            if (hsp_array[left_index]->query.end >= offset)
10874                end = left_index;
10875            else
10876                begin = right_index + 1;
10877        }
10878    }
10879 
10880    return end;
10881 }
10882 
10883 
10884 /** Checks if new candidate HSP is admissible to be linked to a set of
10885  * HSPs on the left. The new HSP must start strictly before the parent
10886  * HSP in both query and subject, and its end must lie within an
10887  * interval from the parent HSP's start, determined by the allowed gap
10888  * and overlap sizes in query and subject.  This function also
10889  * indicates whether parent is already too far to the right of the
10890  * candidate HSP, via a boolean pointer.
10891  *
10892  * @param hsp_set1        First linked set of HSPs. [in]
10893  * @param hsp_set2        Second linked set of HSPs. [in]
10894  * @param overlap_size    Amount by which HSPs are allowed to overlap. [in]
10895  * @param gap_size        Size of the maximum permitted gap in the
10896  *                        query. [in]
10897  * @param longest_intron  Size of the maximum permitted gap in the
10898  *                        sujbect. [in]
10899  * @return                Do the two sets satisfy the admissibility
10900  *                        criteria to form a combined set?
10901  */
10902 static Boolean
BLAST_HSPLinkedSetsAdmissible(BLAST_HSPPtr hsp_set1,BLAST_HSPPtr hsp_set2,Int4 overlap_size,Int4 gap_size,Int4 longest_intron)10903 BLAST_HSPLinkedSetsAdmissible(BLAST_HSPPtr hsp_set1,
10904                               BLAST_HSPPtr hsp_set2,
10905                               Int4 overlap_size, Int4 gap_size,
10906                               Int4 longest_intron)
10907 {
10908     BLAST_HSPPtr* merged_hsps;
10909     Int4 combined_size = 0;
10910     Int4 index;
10911 
10912     if (!hsp_set1 || !hsp_set2 )
10913         return FALSE;
10914 
10915     /* The first input HSP must be the head of its set. */
10916     if (hsp_set1->prev)
10917         return FALSE;
10918 
10919     /* The second input HSP may not be the head of its set. Hence
10920        follow the previous pointers to get to the head. */
10921     for ( ; hsp_set2->prev; hsp_set2 = hsp_set2->prev);
10922 
10923     /* If left and right HSP are the same, return inadmissible
10924        status. */
10925     if (hsp_set1 == hsp_set2)
10926         return FALSE;
10927 
10928     /* Check if these HSPs are for the same protein sequence (same
10929        context) */
10930     if (hsp_set1->context != hsp_set2->context)
10931         return FALSE;
10932 
10933     /* Check if new HSP and hsp_set2 are on the same nucleotide
10934        sequence strand.  (same sign of subject frame) */
10935     if (SIGN(hsp_set1->subject.frame) !=
10936         SIGN(hsp_set2->subject.frame))
10937         return FALSE;
10938 
10939     /* Merge the two sets into an array with increasing order of query
10940        offsets. */
10941     merged_hsps =
10942         BLAST_HSPMergedLinkedSet(hsp_set1, hsp_set2, &combined_size);
10943 
10944     for (index = 0; index < combined_size - 1; ++index) {
10945         BLAST_HSPPtr left_hsp = merged_hsps[index];
10946         BLAST_HSPPtr right_hsp = merged_hsps[index+1];
10947 
10948 
10949         /* If the new HSP is too far to the left from the right_hsp,
10950            indicate this by setting the boolean output value to
10951            TRUE. */
10952         if (left_hsp->query.end < right_hsp->query.offset - gap_size)
10953             break;
10954 
10955         /* Check if the left HSP's query offset is to the right of the
10956            right HSP's offset, i.e. they came in wrong order. */
10957         if (left_hsp->query.offset >= right_hsp->query.offset)
10958             break;
10959 
10960         /* Check the remaining condition for query offsets: left HSP
10961            cannot end further than the maximal allowed overlap from
10962            the right HSP's offset; and left HSP must end before the
10963            right HSP. */
10964         if (left_hsp->query.end > right_hsp->query.offset + overlap_size ||
10965             left_hsp->query.end >= right_hsp->query.end)
10966             break;
10967 
10968         /* Check the subject offsets conditions. */
10969         if (left_hsp->subject.end >
10970             right_hsp->subject.offset + overlap_size ||
10971             left_hsp->subject.end <
10972             right_hsp->subject.offset - longest_intron ||
10973             left_hsp->subject.offset >= right_hsp->subject.offset ||
10974             left_hsp->subject.end >= right_hsp->subject.end)
10975             break;
10976     }
10977 
10978     MemFree(merged_hsps);
10979 
10980     if (index < combined_size - 1)
10981         return FALSE;
10982 
10983     return TRUE;
10984 }
10985 
10986 
10987 /**
10988  * Swap the role of the query and subject within an HSP; used by
10989  * new_link_hsps to implement HSP linking for blastx using the code
10990  * for blastn.
10991  */
10992 static void
BLAST_HSPArraySwapSequences(BLAST_HSPPtr PNTR hsp_array,Int4 hspcnt)10993 BLAST_HSPArraySwapSequences(BLAST_HSPPtr PNTR hsp_array, Int4 hspcnt)
10994 {
10995     Int4 i;
10996 
10997     for(i = 0; i < hspcnt; i++ ) {
10998         BLAST_Seg seg          = hsp_array[i]->query;
10999         hsp_array[i]->query    = hsp_array[i]->subject;
11000         hsp_array[i]->subject  = seg;
11001     }
11002 }
11003 
11004 
11005 /**
11006  * Prepares an array of HSPs for linking within new_link_hsps.
11007  *
11008  * @param search      Parameters for a blast search. [in]
11009  * @param kbp_array   Array of Karlin-Altchul statitic parameters, on
11010  *                    block for each contex. [in]
11011  * @param hsp_array    Array of HSPs. [in/out]
11012  * @pamam hspcnt      Size of hsp_array. [in]
11013  */
11014 static void
new_link_hsps_setup(BlastSearchBlkPtr search,BLAST_KarlinBlkPtr * kbp_array,BLAST_HSPPtr PNTR hsp_array,Int4 hspcnt)11015 new_link_hsps_setup(BlastSearchBlkPtr search,
11016                     BLAST_KarlinBlkPtr * kbp_array,
11017                     BLAST_HSPPtr PNTR hsp_array, Int4 hspcnt)
11018 {
11019    Int4 index;
11020    BLAST_HSPPtr hsp;
11021    Nlm_FloatHi gap_decay_divisor =
11022       BlastGapDecayDivisor(search->pbp->gap_decay_rate, 1);
11023 
11024    /* Find e-values for single HSPs */
11025    s_RoundDownOddScores(search->sbp, search->current_hitlist);
11026    BlastGetNonSumStatsEvalue(search);
11027 
11028    for (index=0; index<hspcnt; index++) {
11029        hsp = hsp_array[index];
11030 
11031        hsp->num              = 1;
11032        hsp->linked_set       = FALSE;
11033        hsp->start_of_chain   = FALSE;
11034        hsp->next             = NULL;
11035        hsp->prev             = NULL;
11036 
11037        hsp->ordering_method  = 3;
11038        hsp->evalue          /= gap_decay_divisor;
11039 
11040        hsp->xsum = kbp_array[hsp->context]->Lambda * hsp->score -
11041            kbp_array[hsp->context]->logK;
11042    }
11043 }
11044 
11045 /** Greedy algorithm to link HSPs with uneven gaps.  Sorts HSPs by
11046  * score. Starting with the highest scoring HSP, finds an HSP that
11047  * produces the best sum e-value when added to the HSP set under
11048  * consideration. The neighboring HSPs in a set must have endpoints
11049  * within a window of each other on the protein axis, and within the
11050  * longest allowed intron length on the nucleotide axis. When no more
11051  * HSPs can be added to the highest scoring set, the next highest
11052  * scoring HSP is considered that is not yet part of any set.
11053  *
11054  * @param search      Paramters for a blast search. [in]
11055  * @param hitlist     A hitlist of HSPs to be linked. [in/out]
11056  * @param hsp_array   An array of HSPs to be linked (redundantly contained
11057  *                    within hitlist.) [in/out]
11058  * @returns           Status: 0 on success, -1 if bad input.
11059  */
11060 static Int2
new_link_hsps(BlastSearchBlkPtr search,BLAST_HitListPtr hitlist)11061 new_link_hsps(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist)
11062 {
11063    BLAST_HSPPtr PNTR hsp_array; /* Original HSP array. */
11064    BLAST_HSPPtr PNTR score_hsp_array;  /* an array of HSPs sorted by
11065                                         decreasing score */
11066    BLAST_HSPPtr PNTR offset_hsp_array; /* an array of HSPs sorted by
11067                                         increasing query offset */
11068    BLAST_HSPPtr head_hsp;
11069    BLAST_KarlinBlkPtr PNTR kbp_array;
11070 
11071    Int4 hspcnt, index, index1;
11072    Int4 overlap_size;           /* Maximal overlap size in query or
11073                                    subject */
11074    Int4 gap_size;               /* Maximal gap size in query */
11075    Int4 longest_intron;         /* Maximum gap size in subject */
11076 
11077    Int4* qend_index_array = NULL;
11078 
11079    /* Check input arguments. */
11080    if (!search || !hitlist)
11081        return -1;
11082 
11083    hsp_array = hitlist->hsp_array;
11084 
11085    if(search->pbp->gapped_calculation) {
11086        kbp_array = search->sbp->kbp_gap;
11087    } else {
11088        kbp_array = search->sbp->kbp;
11089    }
11090    hspcnt = hitlist->hspcnt;
11091 
11092    /* Set up the HSP array to be an array of singleton sets, with
11093     * correct evalue, num and sumscore */
11094    new_link_hsps_setup(search, kbp_array, hsp_array, hspcnt);
11095 
11096    /* If there is a single HSP, don't try to link, just use the
11097     * evalue set in new_link_hsps_setup */
11098    if(hitlist->hspcnt == 1)
11099        return 0;
11100 
11101    overlap_size    = LINK_HSP_OVERLAP;
11102    gap_size        = search->pbp->gap_size;
11103    longest_intron  = search->pbp->longest_intron;
11104 
11105    if(search->prog_number == blast_type_blastx) {
11106        BLAST_HSPArraySwapSequences(hsp_array, hspcnt);
11107    }
11108 
11109    /* Allocate, fill and sort the auxiliary arrays. */
11110    score_hsp_array = (BLAST_HSPPtr PNTR) Malloc(hspcnt*sizeof(BLAST_HSPPtr));
11111    MemCpy(score_hsp_array, hsp_array, hspcnt*sizeof(BLAST_HSPPtr));
11112    HeapSort(score_hsp_array, hspcnt, sizeof(BLAST_HSPPtr), xsum_compare_hsps);
11113 
11114    offset_hsp_array = (BLAST_HSPPtr PNTR) Malloc(hspcnt*sizeof(BLAST_HSPPtr));
11115    MemCpy(offset_hsp_array, hsp_array, hspcnt*sizeof(BLAST_HSPPtr));
11116    HeapSort(offset_hsp_array, hspcnt, sizeof(BLAST_HSPPtr), fwd_compare_hsps);
11117 
11118    BLAST_HSPArrayIndexQueryEnds(offset_hsp_array, hspcnt, &qend_index_array);
11119 
11120    /* head_hsp is set to NULL whenever there is no current linked set
11121       that is being worked on. */
11122    head_hsp = NULL;
11123    for (index = 0; index < hspcnt && score_hsp_array[index]; ) {
11124        double best_evalue, best_sum_score = 0;
11125        BLAST_HSPPtr best_hsp = NULL;
11126        BLAST_HSPPtr tail_hsp = NULL;
11127        Int4 hsp_index_left, hsp_index_right;
11128        Int4 left_offset;
11129 
11130        if (!head_hsp) {
11131            /* Find the highest scoring HSP that is not yet part of a
11132               linked set.  An HSP is part of a linked set if and only
11133               if either prev or next pointer is not NULL. */
11134            while (index<hspcnt && score_hsp_array[index] &&
11135                   (score_hsp_array[index]->next ||
11136                    score_hsp_array[index]->prev))
11137                index++;
11138            if (index==hspcnt)
11139                break;
11140            head_hsp = score_hsp_array[index];
11141        }
11142        /* Find the last link in the current HSP set. */
11143        for (tail_hsp = head_hsp; tail_hsp->next; tail_hsp = tail_hsp->next);
11144 
11145        best_evalue = head_hsp->evalue;
11146        best_sum_score = head_hsp->xsum;
11147        /* left_offset is the leftmost point where an HSP can end to be
11148           admissible for linking with head_hsp. */
11149        left_offset = head_hsp->query.offset - gap_size;
11150 
11151        /* Find the smallest index in the offset array, for which an
11152           HSP can possibly be added to the set currently being
11153           explored. */
11154        hsp_index_left =
11155            BLAST_HSPOffsetEndBinarySearch(offset_hsp_array,
11156                                           hspcnt, qend_index_array,
11157                                           head_hsp->context, left_offset);
11158 
11159        /* Find the largest index in the offset array, for which an HSP
11160           can be possibly added to the currently explored set. */
11161        hsp_index_right =
11162            BLAST_HSPOffsetBinarySearch(offset_hsp_array, hspcnt,
11163                                        tail_hsp->context,
11164                                        tail_hsp->query.end + gap_size);
11165 
11166        for (index1 = hsp_index_left; index1 < hsp_index_right; ++index1) {
11167            BLAST_HSPPtr lhsp = offset_hsp_array[index1];
11168 
11169            /* From each previously linked HSP set consider only one
11170               representative - the leftmost HSP whose query end is >=
11171               left_offset. */
11172            if (lhsp->prev && lhsp->prev->query.end >= left_offset)
11173                continue;
11174 
11175            if (BLAST_HSPLinkedSetsAdmissible(head_hsp, lhsp,
11176                                              overlap_size, gap_size,
11177                                              longest_intron)) {
11178                double evalue, sum_score;
11179                /* Check if the e-value for the new combined HSP set is
11180                   better than for the previously obtained set. */
11181                if ((evalue = SumHSPEvalue(search, head_hsp,
11182                                           lhsp, &sum_score)) <
11183                    MIN(best_evalue, lhsp->evalue)) {
11184                    best_hsp = lhsp;
11185                    best_evalue = evalue;
11186                    best_sum_score = sum_score;
11187                }
11188            }
11189        }
11190 
11191       /* Link the new HSP to the set, if it qualified. */
11192       if (best_hsp) {
11193          head_hsp = BLAST_HSPCombineLinkedSets(head_hsp, best_hsp,
11194                                                best_sum_score, best_evalue);
11195       } else {
11196          head_hsp = NULL;
11197          ++index;
11198       }
11199    }
11200 
11201    /* Free the auxiliary arrays. */
11202    MemFree(score_hsp_array);
11203    MemFree(offset_hsp_array);
11204    MemFree(qend_index_array);
11205 
11206    if(search->prog_number == blast_type_blastx) {
11207        BLAST_HSPArraySwapSequences(hsp_array, hspcnt);
11208    }
11209 
11210    /* Make sure that HSPs are sorted by individual score at exit. */
11211    HeapSort(hitlist->hsp_array, hitlist->hspcnt,
11212             sizeof(BLAST_HSPPtr), score_compare_hsps);
11213 
11214    return 0;
11215 }
11216 
11217 
11218 /*
11219 	This function orders and "links" the HSP's.  It does this
11220 	by first ordering them backwards (with "rev_compare_hsps") and
11221 	then (as the function moves forwards through the list of HSP's)
11222 	comparing them with the previous HSP's. They then end up
11223 	in the "correct" order.
11224 
11225 	The HSP hp_start is used as a "hook" into the chain of HSP's.
11226 	As HSP's are assigned to a set, they are removed from the linked
11227 	list, and further consideration.  hp_start always points to the first
11228 	"real" HSP remaining.
11229 
11230 	Two attempts are made to order the HSP's:
11231 	one has a maximum gap ("gap"), the other has no maximum.
11232 
11233 	This function works with the HSP's resulting from one query
11234 	sequence and one subject sequence.
11235 
11236 	******* Comments -cfj
11237 	This function is, in the worst case, O(N^3) in the number of hsps, and often becomes
11238         the main bottleneck on large query strings.
11239 	I've made a bunch of changes to try to speed up this function (by constant factors),
11240 	while still producing the identical output.
11241 
11242 	******* major changes -cfj
11243 	- Use separate search (ie. separate frame loops) for each subject frame sign.
11244 	- use lh_helper array to store most commonly used info.  This helps by ordering
11245           accesses, and keeping them close.
11246 	- For index=0, we can break out of loop when q_off gets large enough.
11247 	- For index=1, we can break out of loop when remaining maxsum is small enough.
11248 	- Keep track of which hsps are 'linked-to'.  When we pull out the high scoring
11249   	  chain, if none of the removed hsp's are linked to by someone outside the change,
11250 	  then no other hsp's score will change, so we don't need to recompute scores, just
11251 	  find the new largest.
11252         - if max-path was unchanged, it is still the max, so don't recompute all scores
11253 	- dynamically keep track of next hsp in chain with a larger score, this allows us to
11254 	    jump over low scores when walking down list
11255 	- Whan computing the score for each hsp, if the linked_to (from last iter) has not changed,
11256 	  then this item is unchanged also.
11257 	********
11258 */
11259 
11260 static BLAST_HSPPtr
link_hsps(BlastSearchBlkPtr search,BLAST_HitListPtr hitlist,BLAST_HSPPtr PNTR hsp_array)11261 link_hsps(BlastSearchBlkPtr search, BLAST_HitListPtr hitlist, BLAST_HSPPtr PNTR hsp_array)
11262 {
11263 	BLAST_HSPPtr H, H2, best[2], first_hsp, last_hsp, hp_frame_start[3*2];
11264 	BLAST_HSP hp_start;
11265 	BLAST_KarlinBlkPtr PNTR kbp;
11266 	BLAST_Score maxscore, cutoff[2];
11267 	Boolean frame_change, linked_set, ignore_small_gaps;
11268 	Nlm_FloatHi gap_decay_rate, gap_prob, prob[2];
11269 	Int4 index, index1, ordering_method, num_links, frame_index, number_of_query_frames;
11270 	Int4 hp_frame_number[3*2];
11271 	Int4 start_range_size; /* Number of positions at which next HSP can
11272                                   start around the end point of the previous HSP. */
11273         Int4 subject_length, number_of_hsps, total_number_of_hsps;
11274 	VoidPtr link;
11275 	Int4 H2_index,H_index;
11276 	Int4 i;
11277 	Int4 max_q_diff=0;
11278  	Int4 path_changed;  /* will be set if an element is removed that may change an existing path */
11279  	Int4 first_pass, use_current_max;
11280 	LinkHelpStruct *lh_helper=0;
11281 	Uint4 query_num=0; /* AM: to support query concatenation. */
11282         const Int4 overlap_size = LINK_HSP_OVERLAP; /* Maximal allowed overlap
11283                                                        between to successive
11284                                                        HSPs in a linked set. */
11285         Int4 trim_size = (overlap_size+1)/2; /* Distance by which HSPs are
11286                                                 trimmed to remove the potential
11287                                                 overlap. */
11288 
11289 	if (search == NULL || hitlist == NULL)
11290 		return NULL;
11291 
11292         /* enlarge helper array if necessary */
11293         if (hitlist->lh_helper_size <= (hitlist->hspcnt+5)){
11294 	  hitlist->lh_helper_size = MAX(1024,hitlist->hspcnt+5);
11295 	  MemFree(hitlist->lh_helper);
11296 	  hitlist->lh_helper = (LinkHelpStruct *) MemNew(sizeof(LinkHelpStruct)*hitlist->lh_helper_size);
11297 	}
11298 	lh_helper= hitlist->lh_helper;
11299 
11300 	if (search->pbp->gapped_calculation)
11301 	{
11302 		kbp = search->sbp->kbp_gap;
11303 	}
11304 	else
11305 	{
11306 		kbp = search->sbp->kbp;
11307 	}
11308 
11309 	total_number_of_hsps = hitlist->hspcnt;
11310 
11311 	/* AM: Support for query concatenation */
11312 	if( !search->mult_queries )
11313 	  subject_length = MAX((search->subject->length - search->length_adjustment), 1);
11314         else
11315 	{
11316 	  query_num = GetQueryNum( search->mult_queries, hsp_array[0]->query.offset,
11317 	                           hsp_array[0]->query.end, hsp_array[0]->query.frame );
11318 	  subject_length = MAX((search->subject->length
11319 	                        - search->mult_queries->Adjustments[query_num]), 1);
11320 	}
11321 
11322         if (StringCmp(search->prog_name, "tblastn") == 0
11323             || StringCmp(search->prog_name, "tblastx") == 0
11324             ||StringCmp(search->prog_name, "psitblastn") == 0)
11325 	{
11326 		subject_length /= 3;
11327 	}
11328 	subject_length = MAX(subject_length, 1);
11329 	number_of_hsps = total_number_of_hsps;
11330 	start_range_size = search->pbp->gap_size + overlap_size + 1;
11331 	gap_prob = search->pbp->gap_prob;
11332 	gap_decay_rate = search->pbp->gap_decay_rate;
11333 /* Sort by (reverse) position. */
11334 	HeapSort(hsp_array,total_number_of_hsps,sizeof(BLAST_HSPPtr), rev_compare_hsps_cfj);
11335 
11336 	cutoff[0] = search->pbp->cutoff_s_second;
11337 	cutoff[1] = search->pbp->cutoff_big_gap;
11338 	ignore_small_gaps = search->pbp->ignore_small_gaps;
11339 
11340 	if (StringICmp(search->prog_name, "blastn") == 0 || StringICmp(search->prog_name, "blastx") == 0 || StringICmp(search->prog_name, "tblastx") == 0)
11341 	{
11342 		number_of_query_frames = 2;
11343 	}
11344 	else
11345 	{
11346 		number_of_query_frames = 1;
11347 	}
11348 
11349 /* hook up the HSP's */
11350 	hp_frame_start[0] = hsp_array[0];
11351 	hp_frame_number[0] = hp_frame_number[1] = 0;
11352 	hp_frame_number[2] = hp_frame_number[3] = 0;
11353 	frame_change = FALSE;
11354 
11355 
11356 	/* Put entries with different frame parity into separate 'query_frame's. -cfj */
11357 	{
11358 	  Int4 cur_frame=0;
11359 	     for (index=0;index<number_of_hsps;index++)
11360 	     {
11361 		H=hsp_array[index];
11362 		hp_frame_number[cur_frame]++;
11363 
11364 		H->prev= index ? hsp_array[index-1] : NULL;
11365 		H->next= index<(number_of_hsps-1) ? hsp_array[index+1] : NULL;
11366 		if (H->prev != NULL &&
11367 		    ( (SIGN(H->query.frame) != SIGN(H->prev->query.frame))
11368 		       || (SIGN(H->subject.frame) != SIGN(H->prev->subject.frame))
11369 
11370 		      ))
11371 		{ /* If frame switches, then start new list. */
11372 		       hp_frame_number[cur_frame]--;
11373 		       hp_frame_number[++cur_frame]++;
11374 			hp_frame_start[cur_frame] = H;
11375 			H->prev->next = NULL;
11376 			H->prev = NULL;
11377 			frame_change = TRUE;
11378 		}
11379 	     }
11380 	     number_of_query_frames = cur_frame+1;
11381 	}
11382 
11383 	/* max_q_diff is the maximum amount q.offset can differ from q.offset_trim */
11384 	/* This is used to break out of H2 loop early */
11385 	if (search->pbp->old_stats == FALSE)
11386 	{
11387 	    for (index=0;index<number_of_hsps;index++)
11388 	    {
11389 		H=hsp_array[index];
11390 		H->query.offset_trim = H->query.offset +
11391                    MIN(((H->query.length)/4), trim_size);
11392 		H->query.end_trim = H->query.end -
11393                    MIN(((H->query.length)/4), trim_size);
11394 		H->subject.offset_trim = H->subject.offset +
11395                    MIN(((H->subject.length)/4), trim_size);
11396 		H->subject.end_trim = H->subject.end -
11397                    MIN(((H->subject.length)/4), trim_size);
11398 	     }
11399 	    max_q_diff = trim_size;
11400 	}
11401 	else
11402 	{
11403 	    for (index=0;index<number_of_hsps;index++)
11404 	    {
11405 		H=hsp_array[index];
11406 		H->query.offset_trim = H->query.offset + (H->query.length)/8;
11407 		H->query.end_trim = H->query.end - (H->query.length)/8;
11408 		H->subject.offset_trim = H->subject.offset + (H->subject.length)/8;
11409 		H->subject.end_trim = H->subject.end - (H->subject.length)/8;
11410 		max_q_diff=MAX(max_q_diff,(H->query.length/8));
11411 	     }
11412 	}
11413 
11414 	for (frame_index=0; frame_index<number_of_query_frames; frame_index++)
11415 	{
11416 	     MemFill(&hp_start, 0, sizeof(hp_start));
11417 	     hp_start.next = hp_frame_start[frame_index];
11418 	     hp_frame_start[frame_index]->prev = &hp_start;
11419 	     number_of_hsps = hp_frame_number[frame_index];
11420 
11421 	  lh_helper[0].ptr = &hp_start;
11422 	  lh_helper[0].q_off_trim = 0;
11423 	  lh_helper[0].s_off_trim = 0;
11424 	  lh_helper[0].maxsum1  = -10000;
11425 	  lh_helper[0].next_larger  = 0;
11426 
11427 	  /* lh_helper[0]  = empty     = end marker that I added
11428 	   * lh_helper[1]  = hsp_start = empty entry used in original code
11429 	   * lh_helper[2]  = hsp_array->next = hsp_array[0]
11430 	   * lh_helper[i]  = ... = hsp_array[i-2] (for i>=2)
11431 	   */
11432 	  first_pass=1;    /* do full search */
11433 	  path_changed=1;
11434 	  for (H=hp_start.next; H!=NULL; H=H->next)
11435 	    H->hsp_link.changed=1;
11436 
11437 	     while (number_of_hsps > 0)
11438 	     {
11439 	       Int4 last[3];
11440 	       Int4 max[3];
11441 	       last[0]=last[1]=last[2]=0;
11442 	       max[0]=max[1]=max[2]=-10000;
11443 	       /* Initialize the 'best' parameter */
11444 	       best[0] = best[1] = NULL;
11445 
11446 
11447 	       /* See if we can avoid recomputing all scores:
11448 		*  - Find the max paths (based on old scores).
11449 		*  - If no paths were changed by removal of nodes (ie research==0)
11450 		*    then these max paths are still the best.
11451 		*  - else if these max paths were unchanged, then they are still the best.
11452 		*/
11453 	        use_current_max=0;
11454 		if (!first_pass){
11455 		  Int4 max0,max1;
11456 		  /* Find the current max sums */
11457 		  if(!ignore_small_gaps){
11458 		    max0 = -cutoff[0];
11459 		    max1 = -cutoff[1];
11460 		    for (H=hp_start.next; H!=NULL; H=H->next) {
11461 		      Int4 sum0=H->hsp_link.sum[0];
11462 		      Int4 sum1=H->hsp_link.sum[1];
11463 		      if(sum0>=max0)
11464 		      {
11465 			max0=sum0;
11466 			best[0]=H;
11467 		      }
11468 		      if(sum1>=max1)
11469 		      {
11470 			max1=sum1;
11471 			best[1]=H;
11472 		      }
11473 		    }
11474 		  }else{
11475 		    maxscore = -cutoff[1];
11476 		    for (H=hp_start.next; H!=NULL; H=H->next) {
11477 		      Int4  sum=H->hsp_link.sum[1];
11478 		      if(sum>=maxscore)
11479 		      {
11480 			maxscore=sum;
11481 			best[1]=H;
11482 		      }
11483 		    }
11484 		  }
11485 		  if(path_changed==0){
11486 		    /* No path was changed, use these max sums. */
11487 		    use_current_max=1;
11488 		  }
11489 		  else{
11490 		    /* If max path hasn't chaged, we can use it */
11491 		    /* Walk down best, give up if we find a removed item in path */
11492 		    use_current_max=1;
11493 		    if(!ignore_small_gaps){
11494 		      for (H=best[0]; H!=NULL; H=H->hsp_link.link[0])
11495 			if (H->linked_to==-1000) {use_current_max=0; break;}
11496 		    }
11497 		    if(use_current_max)
11498 		      for (H=best[1]; H!=NULL; H=H->hsp_link.link[1])
11499 			if (H->linked_to==-1000) {use_current_max=0; break;}
11500 
11501 		  }
11502 		}
11503 
11504 	       /* reset helper_info */
11505 	       /* Inside this while loop, the linked list order never changes
11506 		* So here we initialize an array of commonly used info,
11507 		* and in this loop we access these arrays instead of the actual list
11508 		*/
11509 		if(!use_current_max){
11510 		for (H=&hp_start,H_index=1; H!=NULL; H=H->next,H_index++) {
11511 		  Int4 s_frame = H->subject.frame;
11512 		  Int4 s_off_t = H->subject.offset_trim;
11513 		  Int4 q_off_t = H->query.offset_trim;
11514 		  lh_helper[H_index].ptr = H;
11515 		  lh_helper[H_index].q_off_trim = q_off_t;
11516 		  lh_helper[H_index].s_off_trim = s_off_t;
11517 		  for(i=0;i<BLAST_NUMBER_OF_ORDERING_METHODS;i++)
11518 		    lh_helper[H_index].sum[i] = H->hsp_link.sum[i];
11519 		  /* lh_helper[H_index].s_frame = SIGN(s_frame);
11520 		   * lh_helper[H_index].prev_same = last[SIGN(s_frame)+1];
11521 		   * last[SIGN(s_frame)+1]=H_index;
11522 		   */
11523 		  max[SIGN(s_frame)+1]=MAX(max[SIGN(s_frame)+1],H->hsp_link.sum[1]);
11524 		  lh_helper[H_index].maxsum1 =max[SIGN(s_frame)+1];
11525 
11526 		  /* set next_larger to link back to closest entry with a sum1 larger than this */
11527 		  {
11528 		    Int4 cur_sum=lh_helper[H_index].sum[1];
11529 		    Int4 prev = H_index-1;
11530 		    Int4 prev_sum = lh_helper[prev].sum[1];
11531 		    while((cur_sum>=prev_sum) && (prev>0)){
11532 		      prev=lh_helper[prev].next_larger;
11533 		      prev_sum = lh_helper[prev].sum[1];
11534 		    }
11535 		    lh_helper[H_index].next_larger = prev;
11536 		  }
11537 		  H->linked_to = 0;
11538 		}
11539 
11540 		lh_helper[1].maxsum1 = -10000;
11541 
11542 
11543 		/****** loop iter for index = 0  **************************/
11544 	     	if(!ignore_small_gaps)
11545 		{
11546 		  index=0;
11547 		     maxscore = -cutoff[index];
11548 		     H_index = 2;
11549 		     for (H=hp_start.next; H!=NULL; H=H->next,H_index++)
11550 		     {
11551 		        Int4 H_hsp_num=0;
11552 			Int4 H_hsp_sum=0;
11553 			Nlm_FloatHi H_hsp_xsum=0.0;
11554 			VoidPtr H_hsp_link=NULL;
11555 			if (H->score > cutoff[index]) {
11556 			  Int4 H_query_etrim = H->query.end_trim;
11557 			  Int4 H_sub_etrim = H->subject.end_trim;
11558 			  Int4 H_q_et_gap = H_query_etrim+start_range_size;
11559 			  Int4 H_s_et_gap = H_sub_etrim+start_range_size;
11560 
11561 			  /* We only walk down hits with the same frame sign */
11562 			  /* for (H2=H->prev; H2!=NULL; H2=H2->prev,H2_index--) */
11563 			  for (H2_index=H_index-1; H2_index>1; H2_index=H2_index-1)
11564 			    {
11565 			        Int4 b1,b2,b4,b5;
11566 				Int4 q_off_t,s_off_t,sum;
11567 
11568 				/* s_frame = lh_helper[H2_index].s_frame; */
11569 				q_off_t = lh_helper[H2_index].q_off_trim;
11570 				s_off_t = lh_helper[H2_index].s_off_trim;
11571 
11572 				/* combine tests to reduce mispredicts -cfj */
11573 				b1 = q_off_t <= H_query_etrim;
11574 				b2 = s_off_t <= H_sub_etrim;
11575 				/* b3 = s_frame - H_sign_sframe; */
11576 				sum = lh_helper[H2_index].sum[index];
11577 
11578 
11579 				b4 = ( q_off_t > H_q_et_gap ) ;
11580 				b5 = ( s_off_t > H_s_et_gap ) ;
11581 
11582                                /* list is sorted by q_off, so q_off should only increase.
11583                                 * q_off_t can only differ from q_off by max_q_diff
11584                                 * So once q_off_t is large enough (ie it exceeds limit
11585                                 * by max_q_diff), we can stop.  -cfj
11586                                 */
11587 				  if(q_off_t > (H_q_et_gap+max_q_diff))
11588 				  break;
11589 
11590 				if (b1|b2|b5|b4) continue;
11591 
11592 
11593 				if (sum>H_hsp_sum)
11594 				{
11595 				        H2=lh_helper[H2_index].ptr;
11596 					H_hsp_num=H2->hsp_link.num[index];
11597 					H_hsp_sum=H2->hsp_link.sum[index];
11598 					H_hsp_xsum=H2->hsp_link.xsum[index];
11599 					H_hsp_link=H2;
11600 
11601 				}
11602 			    } /* end for H2... */
11603 			}
11604 			{
11605               BLAST_Score score=H->score;
11606               Nlm_FloatHi new_xsum =
11607                   H_hsp_xsum +
11608                   (score*(kbp[H->context]->Lambda)) - kbp[H->context]->logK;
11609               Int4 new_sum = H_hsp_sum + (score - cutoff[index]);
11610 
11611 			  H->hsp_link.sum[index] = new_sum;
11612 			  H->hsp_link.num[index] = H_hsp_num+1;
11613 			  H->hsp_link.link[index] = H_hsp_link;
11614 			  lh_helper[H_index].sum[index] = new_sum;
11615 			  if (new_sum >= maxscore)
11616 			    {
11617 			 	maxscore=new_sum;
11618 				best[index]=H;
11619 		    }
11620 			  H->hsp_link.xsum[index] = new_xsum;
11621 			  if(H_hsp_link)
11622 			    ((BLAST_HSPPtr)H_hsp_link)->linked_to++;
11623 			}
11624 		     } /* end for H=... */
11625 		}
11626 		/****** loop iter for index = 1  **************************/
11627 		  index=1;
11628 		     maxscore = -cutoff[index];
11629 		     H_index = 2;
11630 		     for (H=hp_start.next; H!=NULL; H=H->next,H_index++)
11631 		     {
11632 		        Int4 H_hsp_num=0;
11633 			Int4 H_hsp_sum=0;
11634 			Nlm_FloatHi H_hsp_xsum=0.0;
11635 			VoidPtr H_hsp_link=NULL;
11636 
11637 			H->hsp_link.changed=1;
11638 			H2 = H->hsp_link.link[index];
11639 			if ( (!first_pass) &&
11640 			     ( (H2==0) || (H2->hsp_link.changed==0) )
11641 			   )
11642 			  {
11643 			    /* If The best choice last time has not been changed, then it is still the
11644 			       best choice, so no need to walk down list. */
11645 			    if(H2){
11646 			      H_hsp_num=H2->hsp_link.num[index];
11647 			      H_hsp_sum=H2->hsp_link.sum[index];
11648 			      H_hsp_xsum=H2->hsp_link.xsum[index];
11649 			    }
11650 			    H_hsp_link=H2;
11651 			    H->hsp_link.changed=0;
11652 			  } else
11653 			if (H->score > cutoff[index]) {
11654 			  Int4 H_query_etrim = H->query.end_trim;
11655 			  Int4 H_sub_etrim = H->subject.end_trim;
11656 
11657 			  /* Here we look at what was the best choice last time (if it's still around)
11658 			   * and set this to the initial choice.  By setting the best score to
11659 			   * a (potentially) large value initially, we can reduce the number of
11660 			   * hsps checked.  -cfj
11661 			   */
11662 
11663 			  /* Currently we set the best score to a value just less than the real value. This
11664 			   * is not really necessary, but doing this ensures that in the case of a tie, we
11665 			   * make the same selection the original code did.
11666 			   */
11667 
11668 #if 1
11669 			  if(!first_pass&&H2&&H2->linked_to>=0){
11670 			    if(1){
11671 			      /* We set this to less than the real value to keep the original ordering
11672 			       * in case of ties. */
11673 			      H_hsp_sum=H2->hsp_link.sum[index]-1;
11674 			    }else{
11675 			      H_hsp_num=H2->hsp_link.num[index];
11676 			      H_hsp_sum=H2->hsp_link.sum[index];
11677 			      H_hsp_xsum=H2->hsp_link.xsum[index];
11678 			      H_hsp_link=H2;
11679 			    }
11680 			  }
11681 #endif
11682 
11683 			  /* We now only walk down hits with the same frame sign */
11684 			  /* for (H2=H->prev; H2!=NULL; H2=H2->prev,H2_index--) */
11685 			  for (H2_index=H_index-1; H2_index>1;)
11686 			    {
11687 			        Int4 b0,b1,b2;
11688 				Int4 q_off_t,s_off_t,sum,next_larger;
11689 				LinkHelpStruct * H2_helper=&lh_helper[H2_index];
11690 				sum = H2_helper->sum[index];
11691 				next_larger = H2_helper->next_larger;
11692 
11693 				s_off_t = H2_helper->s_off_trim;
11694 				q_off_t = H2_helper->q_off_trim;
11695 
11696 				b0 = sum <= H_hsp_sum;
11697 
11698 				/* Compute the next H2_index */
11699 				H2_index--;
11700 				if(b0){	 /* If this sum is too small to beat H_hsp_sum, advance to a larger sum */
11701 				  H2_index=next_larger;
11702 				}
11703 
11704 				/* combine tests to reduce mispredicts -cfj */
11705 				b1 = q_off_t <= H_query_etrim;
11706 				b2 = s_off_t <= H_sub_etrim;
11707 
11708 				if(0) if(H2_helper->maxsum1<=H_hsp_sum)break;
11709 
11710 				if (!(b0|b1|b2) )
11711 				{
11712   				        H2 = H2_helper->ptr;
11713 
11714 					H_hsp_num=H2->hsp_link.num[index];
11715 					H_hsp_sum=H2->hsp_link.sum[index];
11716 					H_hsp_xsum=H2->hsp_link.xsum[index];
11717 					H_hsp_link=H2;
11718 				}
11719 
11720 			    } /* end for H2_index... */
11721 			} /* end if(H->score>cuttof[]) */
11722 			{
11723 			  BLAST_Score score=H->score;
11724               Nlm_FloatHi new_xsum =
11725                   H_hsp_xsum +
11726                   (score*(kbp[H->context]->Lambda)) - kbp[H->context]->logK;
11727               Int4 new_sum = H_hsp_sum + (score - cutoff[index]);
11728 
11729 			  H->hsp_link.sum[index] = new_sum;
11730 			  H->hsp_link.num[index] = H_hsp_num+1;
11731 			  H->hsp_link.link[index] = H_hsp_link;
11732 			  lh_helper[H_index].sum[index] = new_sum;
11733 			  lh_helper[H_index].maxsum1 = MAX(lh_helper[H_index-1].maxsum1, new_sum);
11734 			  /* Update this entry's 'next_larger' field */
11735 			  {
11736 			    Int4 cur_sum=lh_helper[H_index].sum[1];
11737 			    Int4 prev = H_index-1;
11738 			    Int4 prev_sum = lh_helper[prev].sum[1];
11739 			    while((cur_sum>=prev_sum) && (prev>0)){
11740 			      prev=lh_helper[prev].next_larger;
11741 			      prev_sum = lh_helper[prev].sum[1];
11742 			    }
11743 			    lh_helper[H_index].next_larger = prev;
11744 			  }
11745 
11746 			  if (new_sum >= maxscore)
11747 			    {
11748 			 	maxscore=new_sum;
11749 				best[index]=H;
11750 			    }
11751 			  H->hsp_link.xsum[index] = new_xsum;
11752 			  if(H_hsp_link)
11753 			    ((BLAST_HSPPtr)H_hsp_link)->linked_to++;
11754 			}
11755 		     }
11756 		path_changed=0;
11757 		first_pass=0;
11758 		}
11759 
11760 		if (search->pbp->old_stats == FALSE && search->pbp->use_large_gaps == FALSE)
11761 		{
11762 		  if (!ignore_small_gaps)
11763 		  {
11764 		    /* Select the best ordering method.
11765 		    First we add back in the value cutoff[index] * the number
11766 		    of links, as this was subtracted out for purposes of the
11767 		    comparison above. */
11768 		    best[0]->hsp_link.sum[0] += (best[0]->hsp_link.num[0])*cutoff[0];
11769 
11770 		    /* AM: Support for query concatenation. */
11771             if( best[0]->hsp_link.num[0] > 1 && gap_prob == 0 ) {
11772               prob[0] = INT4_MAX;
11773             } else {
11774               if( !search->mult_queries )
11775                 prob[0] =
11776                   BlastSmallGapSumE(start_range_size,
11777                                     best[0]->hsp_link.num[0],
11778                                     best[0]->hsp_link.xsum[0],
11779                                     search->context[search->first_context].
11780                                     query->effective_length,
11781                                     subject_length,
11782                                     search->dblen_eff,
11783                                     BlastGapDecayDivisor(gap_decay_rate,
11784                                                          best[0]->
11785                                                          hsp_link.num[0]));
11786               else
11787                 prob[0] =
11788                   BlastSmallGapSumE( start_range_size,
11789                                      best[0]->hsp_link.num[0],
11790                                      best[0]->hsp_link.xsum[0],
11791                                      search->mult_queries->
11792                                      EffLengths[query_num],
11793                                      subject_length,
11794                                      search->mult_queries->
11795                                      DbLenEff[query_num],
11796                                      BlastGapDecayDivisor(gap_decay_rate,
11797                                                           best[0]->
11798                                                           hsp_link.num[0]));
11799               if( best[0]->hsp_link.num[0] > 1 ) {
11800                 prob[0] /= gap_prob;
11801                 if( prob[0] > INT4_MAX ) prob[0] = INT4_MAX;
11802               }
11803             }
11804 
11805 		    best[1]->hsp_link.sum[1] += (best[1]->hsp_link.num[1])*cutoff[1];
11806 
11807 		    /* AM: Support for query concatenation. */
11808             if( 1 - gap_prob == 0.0 && best[1]->hsp_link.num[1] > 1 ) {
11809               prob[1] = INT4_MAX;
11810             } else{
11811               if( !search->mult_queries )
11812                 prob[1] =
11813                   BlastLargeGapSumE(best[1]->hsp_link.num[1],
11814                                     best[1]->hsp_link.xsum[1],
11815                                     search->context[search->first_context].
11816                                     query->effective_length,
11817                                     subject_length,
11818                                     search->dblen_eff,
11819                                     BlastGapDecayDivisor(gap_decay_rate,
11820                                                          best[1]->
11821                                                          hsp_link.num[1]));
11822               else
11823                 prob[1] =
11824                   BlastLargeGapSumE( best[1]->hsp_link.num[1],
11825                                      best[1]->hsp_link.xsum[1],
11826                                      search->mult_queries->
11827                                      EffLengths[query_num],
11828                                      subject_length,
11829                                      search->mult_queries->
11830                                      DbLenEff[query_num],
11831                                      BlastGapDecayDivisor(gap_decay_rate,
11832                                                           best[1]->
11833                                                           hsp_link.num[1]));
11834 
11835               if( best[1]->hsp_link.num[1] > 1 ) {
11836                 prob[1] /= 1 - gap_prob;
11837                 if( prob[1] > INT4_MAX ) prob[1] = INT4_MAX;
11838               }
11839             }
11840 		    ordering_method = prob[0]<=prob[1] ? 0:1;
11841 		  }
11842 		  else
11843 		  {
11844 		    /* We only consider the case of big gaps. */
11845 		    best[1]->hsp_link.sum[1] += (best[1]->hsp_link.num[1])*cutoff[1];
11846 		    /* gap_prob=0 here as small gaps are NOT considered. */
11847 
11848 		    /* AM: Support for query concatenation. */
11849             if( !search->mult_queries )
11850               prob[1] =
11851                 BlastLargeGapSumE(best[1]->hsp_link.num[1],
11852                                   best[1]->hsp_link.xsum[1],
11853                                   search->context[search->first_context].
11854                                   query->effective_length,
11855                                   subject_length,
11856                                   search->dblen_eff,
11857                                   BlastGapDecayDivisor(gap_decay_rate,
11858                                                        best[1]->
11859                                                        hsp_link.num[1]));
11860             else
11861               prob[1] =
11862                 BlastLargeGapSumE( best[1]->hsp_link.num[1],
11863                                    best[1]->hsp_link.xsum[1],
11864                                    search->mult_queries->EffLengths[query_num],
11865                                    subject_length,
11866                                    search->mult_queries->
11867                                    DbLenEff[query_num],
11868                                    BlastGapDecayDivisor(gap_decay_rate,
11869                                                         best[1]->
11870                                                         hsp_link.num[1]));
11871 		    ordering_method = 1;
11872 		  }
11873 		}
11874 		else
11875 		{
11876 		    /* We only consider the case of big gaps. */
11877 		    best[1]->hsp_link.sum[1] += (best[1]->hsp_link.num[1])*cutoff[1];
11878 		    /* gap_prob=0 here as small gaps are NOT considered. */
11879 
11880 		    /* AM: Support for query concatenation. */
11881             if( !search->mult_queries )
11882               prob[1] =
11883                 BlastLargeGapSumE( best[1]->hsp_link.num[1],
11884                                    best[1]->hsp_link.xsum[1],
11885                                    search->context[search->first_context].
11886                                    query->effective_length,
11887                                    subject_length,
11888                                    search->dblen_eff,
11889                                    BlastGapDecayDivisor(gap_decay_rate,
11890                                                         best[1]->
11891                                                         hsp_link.num[1]));
11892             else
11893               prob[1] =
11894                 BlastLargeGapSumE( best[1]->hsp_link.num[1],
11895                                    best[1]->hsp_link.xsum[1],
11896                                    search->mult_queries->EffLengths[query_num],
11897                                    subject_length,
11898                                    search->mult_queries->
11899                                    DbLenEff[query_num],
11900                                    BlastGapDecayDivisor(gap_decay_rate,
11901                                                         best[1]->
11902                                                         hsp_link.num[1]));
11903 
11904             ordering_method = 1;
11905 		}
11906 
11907         best[ordering_method]->start_of_chain = TRUE;
11908         best[ordering_method]->evalue = prob[ordering_method];
11909 
11910 /* remove the links that have been ordered already. */
11911 		if (best[ordering_method]->hsp_link.link[ordering_method])
11912 		{
11913 			linked_set = TRUE;
11914 		}
11915 		else
11916 		{
11917 			linked_set = FALSE;
11918 		}
11919 		if (best[ordering_method]->linked_to>0) path_changed=1;
11920 		for (H=best[ordering_method]; H!=NULL;
11921 			H=H->hsp_link.link[ordering_method])
11922 		{
11923 		  if (H->linked_to>1) path_changed=1;
11924 		  H->linked_to=-1000;
11925 		  H->hsp_link.changed=1;
11926 			/* record whether this is part of a linked set. */
11927 			H->linked_set = linked_set;
11928 			if (ordering_method == 0)
11929 				H->ordering_method = BLAST_SMALL_GAPS;
11930 			else
11931 				H->ordering_method = BLAST_LARGE_GAPS;
11932 			H->evalue = prob[ordering_method];
11933 			if (H->next)
11934 				(H->next)->prev=H->prev;
11935 			if (H->prev)
11936 				(H->prev)->next=H->next;
11937 			number_of_hsps--;
11938 		}
11939 
11940 	     } /* end while num_hsps... */
11941 	} /* end for frame_index ... */
11942 
11943 
11944 	HeapSort(hsp_array,total_number_of_hsps,sizeof(BLAST_HSPPtr), rev_compare_hsps);
11945 /* Sort by starting position. */
11946 
11947 	HeapSort(hsp_array, total_number_of_hsps,sizeof(BLAST_HSPPtr), fwd_compare_hsps);
11948 
11949 	for (index=0, last_hsp=NULL;index<total_number_of_hsps; index++)
11950 	{
11951 		H = hsp_array[index];
11952 		H->prev = NULL;
11953 		H->next = NULL;
11954 	}
11955 
11956 /* hook up the HSP's. */
11957 	first_hsp = NULL;
11958 	for (index=0, last_hsp=NULL;index<total_number_of_hsps; index++)
11959 	{
11960 		H = hsp_array[index];
11961 
11962 /* If this is not a single piece or the start of a chain, then Skip it. */
11963 	     	if (H->linked_set == TRUE && H->start_of_chain == FALSE)
11964 			continue;
11965 
11966 /* If the HSP has no "link" connect the "next", otherwise follow the "link"
11967 chain down, connecting them with "next" and "prev". */
11968 		if (last_hsp == NULL)
11969 			first_hsp = H;
11970 		H->prev = last_hsp;
11971 		ordering_method = H->ordering_method;
11972 		if (H->hsp_link.link[ordering_method] == NULL)
11973 		{
11974 /* Grab the next HSP that is not part of a chain or the start of a chain */
11975 /* The "next" pointers are not hooked up yet in HSP's further down array. */
11976 		     index1=index;
11977 		     H2 = index1<(total_number_of_hsps-1) ? hsp_array[index1+1] : NULL;
11978 	     	     while (H2 && H2->linked_set == TRUE &&
11979 				H2->start_of_chain == FALSE)
11980 		     {
11981 			index1++;
11982 		     	H2 = index1<(total_number_of_hsps-1) ? hsp_array[index1+1] : NULL;
11983 		     }
11984 		     H->next= H2;
11985 		}
11986 		else
11987 		{
11988 			/* The first one has the number of links correct. */
11989 			num_links = H->hsp_link.num[ordering_method];
11990 			link = H->hsp_link.link[ordering_method];
11991 			while (link)
11992 			{
11993 				H->num = num_links;
11994 				H->xsum = H->hsp_link.xsum[ordering_method];
11995 				H->next = (BLAST_HSPPtr) link;
11996 				H->prev = last_hsp;
11997 				last_hsp = H;
11998 				H = H->next;
11999 				if (H != NULL)
12000 				    link = H->hsp_link.link[ordering_method];
12001 				else
12002 				    break;
12003 			}
12004 			/* Set these for last link in chain. */
12005 			H->num = num_links;
12006 			H->xsum = H->hsp_link.xsum[ordering_method];
12007 /* Grab the next HSP that is not part of a chain or the start of a chain */
12008 		     	index1=index;
12009 		     	H2 = index1<(total_number_of_hsps-1) ? hsp_array[index1+1] : NULL;
12010 	     	     	while (H2 && H2->linked_set == TRUE &&
12011 				H2->start_of_chain == FALSE)
12012 		     	{
12013 			    index1++;
12014 		     	    H2 = index1<(total_number_of_hsps-1) ? hsp_array[index1+1] : NULL;
12015 			}
12016 		     	H->next= H2;
12017 			H->prev = last_hsp;
12018 		}
12019 		last_hsp = H;
12020 	}
12021 
12022 	return first_hsp;
12023 }
12024 
12025 
12026 /*
12027 	Checks Hitlist's for an HSP (or set of HSP's) with the
12028 	minimum e-value.  Discards those that do not meet the
12029 	standard.
12030 */
12031 
12032 Int2 LIBCALL
BlastReapHitlistByEvalue(BlastSearchBlkPtr search)12033 BlastReapHitlistByEvalue (BlastSearchBlkPtr search)
12034 
12035 {
12036 	BLAST_HitListPtr hitlist;
12037 	BLAST_HSPPtr hsp;
12038 	BLAST_HSPPtr PNTR hsp_array;
12039 	Boolean hsp_deleted=FALSE;
12040 	Int4 hsp_cnt=0;
12041 	Int4 index;
12042 	Nlm_FloatHi cutoff;
12043 
12044 	if (search == NULL)
12045 		return 1;
12046 
12047         cutoff = search->pbp->cutoff_e;
12048 
12049         /* AM: Support for query concatenation. */
12050 	if( !search->mult_queries
12051 	    || search->prog_number != blast_type_tblastn
12052 	    || !search->mult_queries->use_mq )
12053 	  hitlist = search->current_hitlist;
12054         else
12055 	  hitlist = search->mult_queries->HitListArray[
12056 	    search->mult_queries->current_query];
12057 
12058 	if (hitlist)
12059 	{
12060 		hitlist->hspcnt_max = hitlist->hspcnt;
12061 		hsp_array = hitlist->hsp_array;
12062 		for (index=0; index<hitlist->hspcnt; index++)
12063 		{
12064 			hsp = hsp_array[index];
12065 			if (hsp->evalue > cutoff)
12066 			{
12067                             hsp_array[index] = BLAST_HSPFree(hsp_array[index]);
12068                             hsp_deleted = TRUE;
12069 			}
12070 			else
12071 			{
12072 				/*hsp->pvalue = BlastKarlinEtoP(hsp->evalue);*/
12073 				hsp_cnt++;
12074 			}
12075 		}
12076                 if (hsp_deleted == TRUE)
12077 		{
12078 			HspArrayPurge(hitlist->hsp_array, hitlist->hspcnt, FALSE);
12079 		}
12080 
12081 		hitlist->hspcnt = hsp_cnt;
12082 		hitlist->hspcnt_max = hitlist->hspcnt;
12083 		if (hitlist->hspcnt == 0)
12084 		{
12085 			BlastHitListPurge(hitlist);
12086 		}
12087 		else
12088 		{
12089         		NlmMutexLockEx(&search->thr_info->callback_mutex);
12090 			search->thr_info->number_of_pos_hits++;
12091             		NlmMutexUnlock(search->thr_info->callback_mutex);
12092 			search->number_of_seqs_better_E++;
12093 		}
12094 	}
12095 
12096 	/* AM: Support for query concatenation. */
12097 	if( !search->mult_queries
12098 	    || search->prog_number != blast_type_tblastn
12099 	    || !search->mult_queries->use_mq )
12100 	  search->current_hitlist = hitlist;
12101         else
12102 	  search->mult_queries->HitListArray[
12103 	    search->mult_queries->current_query] = hitlist;
12104 
12105 	return 0;
12106 }
12107 
12108 /*
12109 	Checks Hitlist's for an HSP (or set of HSP's) with the
12110 	minimum e-value.  Discards those that do not meet the
12111 	standard.
12112 */
12113 
12114 Int2 LIBCALL
BlastGetNonSumStatsEvalue(BlastSearchBlkPtr search)12115 BlastGetNonSumStatsEvalue (BlastSearchBlkPtr search)
12116 {
12117 	BLAST_HitListPtr hitlist;
12118 	BLAST_HSPPtr hsp;
12119 	BLAST_HSPPtr PNTR hsp_array;
12120 	BLAST_KarlinBlkPtr PNTR kbp;
12121 	Int4 hsp_cnt;
12122 	Int4 index;
12123 	/* AM: Added to support query concatencation. */
12124 	Int4 query_num;
12125 
12126 	if (search == NULL)
12127 		return 1;
12128 
12129 	if (search->pbp->gapped_calculation)
12130 	{
12131 		kbp = search->sbp->kbp_gap;
12132 	}
12133 	else
12134 	{
12135 		kbp = search->sbp->kbp;
12136 	}
12137 
12138 	hitlist = search->current_hitlist;
12139 	if (hitlist)
12140 	{
12141 		hsp_cnt = hitlist->hspcnt;
12142 		hsp_array = hitlist->hsp_array;
12143 		for (index=0; index<hsp_cnt; index++)
12144 		{
12145 			hsp = hsp_array[index];
12146                         if (!search->pbp->mb_params)
12147 			{
12148 			  /* AM: changed to support query concatenation. */
12149 			  if( !search->mult_queries )
12150                             hsp->evalue = BlastKarlinStoE_simple(hsp->score,
12151                                                                  kbp[hsp->context],
12152                                                                  search->searchsp_eff);
12153                           else
12154 			  {
12155 			    query_num = GetQueryNum( search->mult_queries,
12156 			                             hsp->query.offset,
12157 						     hsp->query.end,
12158 						     hsp->query.frame );
12159 	       	            hsp->evalue = BlastKarlinStoE_simple( hsp->score,
12160 		                                                  kbp[hsp->context],
12161 						                  search->mult_queries->SearchSpEff[query_num] );
12162 			  }
12163 			}
12164                         else {
12165                            FloatHi searchsp_eff;
12166                            hsp->context = BinarySearchInt4(hsp->query.offset,
12167                                                            search->query_context_offsets, (Int4) (search->last_context+1));
12168                            if (kbp[hsp->context]) {
12169                               searchsp_eff = (FloatHi) search->dblen_eff *
12170                               (FloatHi) search->context[hsp->context].query->effective_length;
12171                               hsp->evalue = BlastKarlinStoE_simple(hsp->score,
12172                                    kbp[hsp->context], searchsp_eff);
12173                            }
12174                         }
12175 		}
12176 	}
12177 	return 0;
12178 }
12179 
12180 Int2 LIBCALL
BlastTimeFillStructure(BlastTimeKeeperPtr btkp)12181 BlastTimeFillStructure(BlastTimeKeeperPtr btkp)
12182 
12183 {
12184 	CPUTimePtr	pTime;
12185 
12186 	if (btkp == NULL)
12187 	    return 1;
12188 
12189 	pTime = CPUTimeMeasure();
12190 	if (pTime == NULL)
12191 	    return 1;
12192 
12193 	btkp->user = (Nlm_FloatLo) CPUTimeGetUser(pTime);
12194 	btkp->system = (Nlm_FloatLo) CPUTimeGetSys(pTime);
12195 	btkp->total = btkp->user + btkp->system;
12196 
12197 	CPUTimeFree(pTime);
12198 
12199 	return 0;
12200 }
12201 
12202 /*
12203 	starts the awake thread using static variables in this file.
12204 */
12205 
12206 void
BlastStartAwakeThread(BlastThrInfoPtr thr_info)12207 BlastStartAwakeThread(BlastThrInfoPtr thr_info)
12208 {
12209     VoidPtr status=NULL;
12210 
12211     if (!thr_info->tick_callback)
12212        return;
12213     /* If awake_thr is running from the last search, then wait for the join. */
12214     /* This pointer is NULL on the first search ever. */
12215     if (thr_info->awake_thr) {
12216         NlmThreadJoin(thr_info->awake_thr, &status);
12217         thr_info->awake_thr = NULL;
12218     }
12219 
12220     if (NlmThreadsAvailable()) {
12221         thr_info->awake = TRUE;
12222         /* last tick is used by 'star_proc' */
12223         thr_info->awake_thr =
12224             NlmThreadCreate(star_proc, thr_info);
12225     }
12226 
12227     return;
12228 }
12229 
12230 /* Change the awake flag.  This thread will die in one second. */
12231 void
BlastStopAwakeThread(BlastThrInfoPtr thr_info)12232 BlastStopAwakeThread(BlastThrInfoPtr thr_info)
12233 {
12234     thr_info->awake = FALSE;
12235 
12236 }
12237