1 /* $Id: aa_ungapped.c 500404 2016-05-04 14:59:01Z camacho $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  */
26 
27 /** @file aa_ungapped.c
28  * Functions to iterate over seed hits and perform ungapped extensions.
29  */
30 
31 #include <algo/blast/core/aa_ungapped.h>
32 #include <algo/blast/core/blast_aalookup.h>
33 #include <algo/blast/core/blast_aascan.h>
34 #include <algo/blast/core/blast_util.h>
35 
36 /** Scan a subject sequence for word hits and trigger two-hit extensions.
37  *
38  * @param subject the subject sequence [in]
39  * @param query the query sequence [in]
40  * @param lookup_wrap the lookup table [in]
41  * @param ewp Structure containing the diagonal array
42  * @param matrix the substitution matrix [in]
43  * @param word_params structure containing per-context cutoff information [in]
44  * @param query_info structure containing context ranges [in]
45  * @param offset_pairs Array for storing query and subject offsets. [in]
46  * @param array_size the number of elements in each offset array [in]
47  * @param ungapped_hsps hsps resulting from the ungapped extension [out]
48  * @param ungapped_stats Various hit counts. Not filled if NULL [out]
49  */
50 static Int2 s_BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
51                                 const BLAST_SequenceBlk* query,
52                                 const LookupTableWrap* lookup_wrap,
53                                 Blast_ExtendWord * ewp,
54                                 Int4 ** matrix,
55                                 const BlastInitialWordParameters * word_params,
56                                 BlastQueryInfo * query_info,
57                                 BlastOffsetPair* NCBI_RESTRICT offset_pairs,
58                                 Int4 array_size,
59                                 BlastInitHitList* ungapped_hsps,
60                                 BlastUngappedStats* ungapped_stats);
61 
62 /** Scan a subject sequence for word hits and trigger two-hit extensions
63  * (specialized for RPS blast).
64  *
65  * @param subject the subject sequence [in]
66  * @param query the query sequence [in]
67  * @param lookup_wrap the lookup table [in]
68  * @param ewp Structure containing the diagonal array [in]
69  * @param matrix the substitution matrix [in]
70  * @param cutoff cutoff score for saving ungapped HSPs [in]
71  * @param dropoff x dropoff [in]
72  * @param ungapped_hsps hsps resulting from the ungapped extension [out]
73  * @param ungapped_stats Various hit counts. Not filled if NULL [out]
74  */
75 static Int2 s_BlastRPSWordFinder_TwoHit(const BLAST_SequenceBlk* subject,
76                                const BLAST_SequenceBlk* query,
77                                const LookupTableWrap* lookup_wrap,
78                                Blast_ExtendWord * ewp,
79                                Int4 ** matrix,
80                                Int4 cutoff,
81                                Int4 dropoff,
82                                BlastInitHitList* ungapped_hsps,
83                                BlastUngappedStats* ungapped_stats);
84 
85 /** Scan a subject sequence for word hits and trigger one-hit extensions.
86  *
87  * @param subject the subject sequence
88  * @param query the query sequence
89  * @param lookup_wrap the lookup table
90  * @param ewp Structure containing the diagonal array [in]
91  * @param matrix the substitution matrix [in]
92  * @param word_params structure containing per-context cutoff information [in]
93  * @param query_info structure containing context ranges [in]
94  * @param offset_pairs Array for storing query and subject offsets. [in]
95  * @param array_size the number of elements in each offset array
96  * @param ungapped_hsps hsps resulting from the ungapped extensions [out]
97  * @param ungapped_stats Various hit counts. Not filled if NULL [out]
98  */
99 static Int2 s_BlastAaWordFinder_OneHit(const BLAST_SequenceBlk* subject,
100                               const BLAST_SequenceBlk* query,
101                               const LookupTableWrap* lookup_wrap,
102                               Blast_ExtendWord * ewp,
103                               Int4 ** matrix,
104                               const BlastInitialWordParameters * word_params,
105                               BlastQueryInfo * query_info,
106                               BlastOffsetPair* NCBI_RESTRICT offset_pairs,
107                               Int4 array_size,
108                               BlastInitHitList* ungapped_hsps,
109                               BlastUngappedStats* ungapped_stats);
110 
111 /** Scan a subject sequence for word hits and trigger one-hit extensions
112  * (spcialized for RPS blast).
113  *
114  * @param subject the subject sequence
115  * @param query the query sequence
116  * @param lookup_wrap the lookup table
117  * @param ewp Structure containing the diagonal array [in]
118  * @param matrix the substitution matrix [in]
119  * @param cutoff cutoff score for saving ungapped HSPs [in]
120  * @param dropoff x dropoff [in]
121  * @param ungapped_hsps hsps resulting from the ungapped extensions [out]
122  * @param ungapped_stats Various hit counts. Not filled if NULL [out]
123  */
124 static Int2 s_BlastRPSWordFinder_OneHit(const BLAST_SequenceBlk* subject,
125                                const BLAST_SequenceBlk* query,
126                                const LookupTableWrap* lookup_wrap,
127                                Blast_ExtendWord * ewp,
128                                Int4 ** matrix,
129                                Int4 cutoff,
130                                Int4 dropoff,
131                                BlastInitHitList* ungapped_hsps,
132                                BlastUngappedStats* ungapped_stats);
133 
134 /** Perform a one-hit extension. Beginning at the specified hit,
135  * extend to the left, then extend to the right.
136  *
137  * @param matrix the substitution matrix [in]
138  * @param subject subject sequence [in]
139  * @param query query sequence [in]
140  * @param s_off subject offset [in]
141  * @param q_off query offset [in]
142  * @param dropoff X dropoff parameter [in]
143  * @param hsp_q the offset in the query where the HSP begins [out]
144  * @param hsp_s the offset in the subject where the HSP begins [out]
145  * @param hsp_len the length of the HSP [out]
146  * @param word_size number of letters in the initial word hit [in]
147  * @param use_pssm TRUE if the scoring matrix is position-specific [in]
148  * @param s_last_off the rightmost subject offset examined [out]
149  * @return the score of the hsp.
150  */
151 static Int4 s_BlastAaExtendOneHit(Int4 ** matrix,
152                          const BLAST_SequenceBlk* subject,
153                          const BLAST_SequenceBlk* query,
154                          Int4 s_off,
155                          Int4 q_off,
156                          Int4 dropoff,
157                          Int4* hsp_q,
158                          Int4* hsp_s,
159                          Int4* hsp_len,
160                          Int4 word_size,
161                          Boolean use_pssm,
162                          Int4* s_last_off);
163 
164 /** Perform a two-hit extension. Given two hits L and R, begin
165  * at R and extend to the left. If we do not reach L, abort the extension.
166  * Otherwise, begin at R and extend to the right.
167  *
168  * @param matrix the substitution matrix [in]
169  * @param subject subject sequence [in]
170  * @param query query sequence [in]
171  * @param s_left_off left subject offset [in]
172  * @param s_right_off right subject offset [in]
173  * @param q_right_off right query offset [in]
174  * @param dropoff X dropoff parameter [in]
175  * @param hsp_q the offset in the query where the HSP begins [out]
176  * @param hsp_s the offset in the subject where the HSP begins [out]
177  * @param hsp_len the length of the HSP [out]
178  * @param use_pssm TRUE if the scoring matrix is position-specific [in]
179  * @param word_size number of letters in one word [in]
180  * @param right_extend set to TRUE if an extension to the right happened [out]
181  * @param s_last_off the rightmost subject offset examined [out]
182  * @return the score of the hsp.
183  */
184 static Int4 s_BlastAaExtendTwoHit(Int4 ** matrix,
185                          const BLAST_SequenceBlk* subject,
186                          const BLAST_SequenceBlk* query,
187                          Int4 s_left_off,
188                          Int4 s_right_off,
189                          Int4 q_right_off,
190                          Int4 dropoff,
191                          Int4* hsp_q,
192                          Int4* hsp_s,
193                          Int4* hsp_len,
194                          Boolean use_pssm,
195                          Int4 word_size,
196                          Boolean *right_extend,
197                          Int4* s_last_off);
198 
199 
BlastAaWordFinder(BLAST_SequenceBlk * subject,BLAST_SequenceBlk * query,BlastQueryInfo * query_info,LookupTableWrap * lut_wrap,Int4 ** matrix,const BlastInitialWordParameters * word_params,Blast_ExtendWord * ewp,BlastOffsetPair * NCBI_RESTRICT offset_pairs,Int4 offset_array_size,BlastInitHitList * init_hitlist,BlastUngappedStats * ungapped_stats)200 Int2 BlastAaWordFinder(BLAST_SequenceBlk * subject,
201                        BLAST_SequenceBlk * query,
202                        BlastQueryInfo * query_info,
203                        LookupTableWrap * lut_wrap,
204                        Int4 ** matrix,
205                        const BlastInitialWordParameters * word_params,
206                        Blast_ExtendWord * ewp,
207                        BlastOffsetPair * NCBI_RESTRICT offset_pairs,
208                        Int4 offset_array_size,
209                        BlastInitHitList * init_hitlist,
210                        BlastUngappedStats * ungapped_stats)
211 {
212     Int2 status = 0;
213 
214     if (ewp->diag_table->multiple_hits) {
215         status = s_BlastAaWordFinder_TwoHit(subject, query,
216                                             lut_wrap, ewp,
217                                             matrix,
218                                             word_params,
219                                             query_info,
220                                             offset_pairs,
221                                             offset_array_size,
222                                             init_hitlist, ungapped_stats);
223     } else {
224         status = s_BlastAaWordFinder_OneHit(subject, query,
225                                             lut_wrap, ewp,
226                                             matrix,
227                                             word_params,
228                                             query_info,
229                                             offset_pairs,
230                                             offset_array_size,
231                                             init_hitlist, ungapped_stats);
232     }
233 
234     Blast_InitHitListSortByScore(init_hitlist);
235     return status;
236 }
237 
BlastRPSWordFinder(BLAST_SequenceBlk * subject,BLAST_SequenceBlk * query,BlastQueryInfo * query_info,LookupTableWrap * lut_wrap,Int4 ** matrix,const BlastInitialWordParameters * word_params,Blast_ExtendWord * ewp,BlastOffsetPair * NCBI_RESTRICT offset_pairs,Int4 offset_array_size,BlastInitHitList * init_hitlist,BlastUngappedStats * ungapped_stats)238 Int2 BlastRPSWordFinder(BLAST_SequenceBlk * subject,
239                         BLAST_SequenceBlk * query,
240                         BlastQueryInfo * query_info,
241                         LookupTableWrap * lut_wrap,
242                         Int4 ** matrix,
243                         const BlastInitialWordParameters * word_params,
244                         Blast_ExtendWord * ewp,
245                         BlastOffsetPair * NCBI_RESTRICT offset_pairs,
246                         Int4 offset_array_size,
247                         BlastInitHitList * init_hitlist,
248                         BlastUngappedStats * ungapped_stats)
249 {
250     Int2 status = 0;
251     BlastUngappedCutoffs *cutoffs;
252     Int4 context = subject->oid;
253 
254     /* for rpsblast, query contains the concatenated database
255        and subject contains one query sequence (or one frame
256        of a translated nucleotide sequence). This means the cutoff
257        and X-drop value is the same for all hits */
258 
259     if (subject->frame != 0) {
260         context = subject->oid * NUM_FRAMES +
261                         BLAST_FrameToContext(subject->frame,
262                                              eBlastTypeRpsTblastn);
263     }
264     cutoffs = word_params->cutoffs + context;
265 
266     if (ewp->diag_table->multiple_hits) {
267         status = s_BlastRPSWordFinder_TwoHit(subject, query,
268                                              lut_wrap, ewp,
269                                              matrix,
270                                              cutoffs->cutoff_score,
271                                              cutoffs->x_dropoff,
272                                              init_hitlist, ungapped_stats);
273     } else {
274         status = s_BlastRPSWordFinder_OneHit(subject, query,
275                                              lut_wrap, ewp,
276                                              matrix,
277                                              cutoffs->cutoff_score,
278                                              cutoffs->x_dropoff,
279                                              init_hitlist, ungapped_stats);
280     }
281 
282     Blast_InitHitListSortByScore(init_hitlist);
283     return status;
284 }
285 
286 static Int2
s_BlastRPSWordFinder_TwoHit(const BLAST_SequenceBlk * subject,const BLAST_SequenceBlk * query,const LookupTableWrap * lookup_wrap,Blast_ExtendWord * ewp,Int4 ** matrix,Int4 cutoff,Int4 dropoff,BlastInitHitList * ungapped_hsps,BlastUngappedStats * ungapped_stats)287 s_BlastRPSWordFinder_TwoHit(const BLAST_SequenceBlk * subject,
288                             const BLAST_SequenceBlk * query,
289                             const LookupTableWrap * lookup_wrap,
290                             Blast_ExtendWord * ewp,
291                             Int4 ** matrix,
292                             Int4 cutoff,
293                             Int4 dropoff,
294                             BlastInitHitList * ungapped_hsps,
295                             BlastUngappedStats * ungapped_stats)
296 {
297     BlastRPSLookupTable *lookup;
298     Int4 wordsize;
299     Int4 i, j;
300     Int4 hits = 0;
301     Int4 totalhits = 0;
302     Int4 first_offset = 0;
303     Int4 last_offset;
304     Int4 score;
305     Int4 hsp_q, hsp_s, hsp_len;
306     Int4 window;
307     Int4 last_hit, s_last_off, diff;
308     Int4 diag_offset, diag_coord, diag_mask;
309     DiagStruct *diag_array;
310     Boolean right_extend;
311     Int4 hits_extended = 0;
312     BLAST_DiagTable * diag = ewp->diag_table;
313 
314     ASSERT(diag != NULL);
315 
316     diag_offset = diag->offset;
317     diag_array = diag->hit_level_array;
318 
319     ASSERT(diag_array);
320 
321     diag_mask = diag->diag_mask;
322     window = diag->window;
323 
324     lookup = (BlastRPSLookupTable *) lookup_wrap->lut;
325     wordsize = lookup->wordsize;
326     last_offset = subject->length - wordsize;
327 
328     while (first_offset <= last_offset) {
329         /* scan the subject sequence for hits */
330         hits = BlastRPSScanSubject(lookup_wrap, subject, &first_offset);
331 
332         totalhits += hits;
333         /* for each region of the concatenated database */
334         for (i = 0; i < lookup->num_buckets; ++i) {
335             RPSBucket *curr_bucket = lookup->bucket_array + i;
336             BlastOffsetPair *offset_pairs = curr_bucket->offset_pairs;
337             hits = curr_bucket->num_filled;
338 
339             /* handle each hit in the neighborhood. Because the neighborhood
340                is small and there should be many hits there, proceeding one
341                bucket at a time provides maximum reuse of data structures. A
342                side benefit is that ungapped extensions also cluster together
343                in the concatenated database, reusing PSSM data as well. */
344             for (j = 0; j < hits; ++j) {
345                 Uint4 query_offset = offset_pairs[j].qs_offsets.q_off;
346                 Uint4 subject_offset = offset_pairs[j].qs_offsets.s_off;
347 
348                 /* calculate the diagonal associated with this query-subject
349                    pair */
350 
351                 diag_coord = (query_offset - subject_offset) & diag_mask;
352 
353                 /* If the reset bit is set, an extension just happened. */
354                 if (diag_array[diag_coord].flag) {
355                     /* If we've already extended past this hit, skip it. */
356                     if ((Int4) (subject_offset + diag_offset) <
357                         diag_array[diag_coord].last_hit) {
358                         continue;
359                     }
360                     /* Otherwise, start a new hit. */
361                     else {
362                         diag_array[diag_coord].last_hit =
363                             subject_offset + diag_offset;
364                         diag_array[diag_coord].flag = 0;
365                     }
366                 }
367                 /* If the reset bit is cleared, try to start an extension. */
368                 else {
369                     /* find the distance to the last hit on this diagonal */
370                     last_hit = diag_array[diag_coord].last_hit - diag_offset;
371                     diff = subject_offset - last_hit;
372 
373                     if (diff >= window) {
374                         /* We are beyond the window for this diagonal; start
375                            a new hit */
376                         diag_array[diag_coord].last_hit =
377                             subject_offset + diag_offset;
378                         continue;
379                     }
380 
381                     /* If the difference is less than the wordsize (i.e.
382                        last hit and this hit overlap), give up */
383 
384                     if (diff < wordsize) {
385                         continue;
386                     }
387 
388                     /* Extend this pair of hits. The extension to the left
389                        must reach the end of the first word in order for
390                        extension to the right to proceed. */
391 
392                     score = s_BlastAaExtendTwoHit(matrix, subject, query,
393                                                   last_hit + wordsize,
394                                                   subject_offset, query_offset,
395                                                   dropoff, &hsp_q, &hsp_s,
396                                                   &hsp_len, TRUE,
397                                                   wordsize, &right_extend,
398                                                   &s_last_off);
399 
400                     ++hits_extended;
401 
402                     /* if the hsp meets the score threshold, report it */
403                     if (score >= cutoff)
404                         BlastSaveInitHsp(ungapped_hsps, hsp_q, hsp_s,
405                                          query_offset, subject_offset,
406                                          hsp_len, score);
407 
408                     /* If an extension to the right happened, reset the last
409                        hit so that future hits to this diagonal must start
410                        over. */
411 
412                     if (right_extend) {
413                         diag_array[diag_coord].flag = 1;
414                         diag_array[diag_coord].last_hit =
415                             s_last_off - (wordsize - 1) + diag_offset;
416                     }
417                     /* Otherwise, make the present hit into the previous hit
418                        for this diagonal */
419                     else {
420                         diag_array[diag_coord].last_hit =
421                             subject_offset + diag_offset;
422                     }
423                 }               /* end else */
424 
425             }                   /* end for - done with this batch of hits */
426         }                       /* end for - done with this bucket, call
427                                    scansubject again */
428     }                           /* end while - done with the entire sequence.
429                                  */
430 
431     /* increment the offset in the diagonal array */
432     Blast_ExtendWordExit(ewp, subject->length);
433 
434     Blast_UngappedStatsUpdate(ungapped_stats, totalhits, hits_extended,
435                               ungapped_hsps->total);
436     return 0;
437 }
438 
439 static Int2
s_BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk * subject,const BLAST_SequenceBlk * query,const LookupTableWrap * lookup_wrap,Blast_ExtendWord * ewp,Int4 ** matrix,const BlastInitialWordParameters * word_params,BlastQueryInfo * query_info,BlastOffsetPair * NCBI_RESTRICT offset_pairs,Int4 array_size,BlastInitHitList * ungapped_hsps,BlastUngappedStats * ungapped_stats)440 s_BlastAaWordFinder_TwoHit(const BLAST_SequenceBlk * subject,
441                            const BLAST_SequenceBlk * query,
442                            const LookupTableWrap * lookup_wrap,
443                            Blast_ExtendWord * ewp,
444                            Int4 ** matrix,
445                            const BlastInitialWordParameters * word_params,
446                            BlastQueryInfo * query_info,
447                            BlastOffsetPair * NCBI_RESTRICT offset_pairs,
448                            Int4 array_size,
449                            BlastInitHitList * ungapped_hsps,
450                            BlastUngappedStats * ungapped_stats)
451 {
452     Boolean use_pssm = FALSE;
453     Int4 wordsize;
454     Int4 i;
455     Int4 hits = 0;
456     Int4 totalhits = 0;
457     Int4 score;
458     Int4 hsp_q, hsp_s, hsp_len = 0;
459     Int4 window;
460     Int4 last_hit, s_last_off, diff;
461     Int4 diag_offset, diag_coord, diag_mask;
462     DiagStruct *diag_array;
463     Boolean right_extend;
464     Int4 hits_extended = 0;
465     Int4 curr_context;
466     BlastUngappedCutoffs *cutoffs;
467     BLAST_DiagTable * diag = ewp->diag_table;
468     TAaScanSubjectFunction scansub;
469     Int4 scan_range[3];
470 
471     ASSERT(diag != NULL);
472 
473     diag_offset = diag->offset;
474     diag_array = diag->hit_level_array;
475 
476     ASSERT(diag_array);
477 
478     diag_mask = diag->diag_mask;
479     window = diag->window;
480 
481     if (lookup_wrap->lut_type == eAaLookupTable) {
482         BlastAaLookupTable *lookup =
483                         (BlastAaLookupTable *)(lookup_wrap->lut);
484         scansub = (TAaScanSubjectFunction)(lookup->scansub_callback);
485         wordsize = lookup->word_length;
486         use_pssm = lookup->use_pssm;
487     }
488     else {
489         BlastCompressedAaLookupTable *lookup =
490                         (BlastCompressedAaLookupTable *)(lookup_wrap->lut);
491         scansub = (TAaScanSubjectFunction)(lookup->scansub_callback);
492         wordsize = lookup->word_length;
493     }
494 
495     scan_range[0] = 0;
496     scan_range[1] = subject->seq_ranges[0].left;
497     scan_range[2] = subject->seq_ranges[0].right - wordsize;
498 
499     if (scan_range[2] < scan_range[1])
500         scan_range[2] = scan_range[1];
501 
502     while (scan_range[1] <= scan_range[2]) {
503         /* scan the subject sequence for hits */
504         hits = scansub(lookup_wrap, subject,
505                                   offset_pairs, array_size, scan_range);
506 
507         totalhits += hits;
508         /* for each hit, */
509         for (i = 0; i < hits; ++i) {
510             Uint4 query_offset = offset_pairs[i].qs_offsets.q_off;
511             Uint4 subject_offset = offset_pairs[i].qs_offsets.s_off;
512 
513             /* calculate the diagonal associated with this query-subject pair
514              */
515 
516             diag_coord = (query_offset - subject_offset) & diag_mask;
517 
518             /* If the reset bit is set, an extension just happened. */
519             if (diag_array[diag_coord].flag) {
520                 /* If we've already extended past this hit, skip it. */
521                 if ((Int4) (subject_offset + diag_offset) <
522                     diag_array[diag_coord].last_hit) {
523                     continue;
524                 }
525                 /* Otherwise, start a new hit. */
526                 else {
527                     diag_array[diag_coord].last_hit =
528                         subject_offset + diag_offset;
529                     diag_array[diag_coord].flag = 0;
530                 }
531             }
532             /* If the reset bit is cleared, try to start an extension. */
533             else {
534                 /* find the distance to the last hit on this diagonal */
535                 last_hit = diag_array[diag_coord].last_hit - diag_offset;
536                 diff = subject_offset - last_hit;
537 
538                 if (diff >= window) {
539                     /* We are beyond the window for this diagonal; start a
540                        new hit */
541                     diag_array[diag_coord].last_hit =
542                         subject_offset + diag_offset;
543                     continue;
544                 }
545 
546                 /* If the difference is less than the wordsize (i.e. last
547                    hit and this hit overlap), give up */
548 
549                 if (diff < wordsize) {
550                     continue;
551                 }
552 
553                 /* Extend this pair of hits. The extension to the left must
554                    reach the end of the first word in order for extension to
555                    the right to proceed.
556 
557                    To use the cutoff and X-drop values appropriate for this
558                    extension, the query context must first be found */
559 
560                 curr_context = BSearchContextInfo(query_offset, query_info);
561 
562                 /* Check if the last hit hits current query. Because last_hit
563                    is never reset, it may contain a hit to the previous
564                    concatenated query */
565 
566                 if (query_offset - diff <
567                     query_info->contexts[curr_context].query_offset) {
568 
569                     /* there was no last hit for this diagnol; start a new hit */
570                     diag_array[diag_coord].last_hit =
571                         subject_offset + diag_offset;
572                     continue;
573                 }
574 
575                 cutoffs = word_params->cutoffs + curr_context;
576                 score = s_BlastAaExtendTwoHit(matrix, subject, query,
577                                               last_hit + wordsize,
578                                               subject_offset, query_offset,
579                                               cutoffs->x_dropoff,
580                                               &hsp_q, &hsp_s,
581                                               &hsp_len, use_pssm,
582                                               wordsize, &right_extend,
583                                               &s_last_off);
584 
585                 ++hits_extended;
586 
587                 /* if the hsp meets the score threshold, report it */
588                 if (score >= cutoffs->cutoff_score)
589                     BlastSaveInitHsp(ungapped_hsps, hsp_q, hsp_s,
590                                      query_offset, subject_offset, hsp_len,
591                                      score);
592 
593                 /* If an extension to the right happened, reset the last hit
594                    so that future hits to this diagonal must start over. */
595 
596                 if (right_extend) {
597                     diag_array[diag_coord].flag = 1;
598                     diag_array[diag_coord].last_hit =
599                         s_last_off - (wordsize - 1) + diag_offset;
600                 }
601                 /* Otherwise, make the present hit into the previous hit for
602                    this diagonal */
603                 else {
604                     diag_array[diag_coord].last_hit =
605                         subject_offset + diag_offset;
606                 }
607             }                   /* end else */
608         }                       /* end for - done with this batch of hits,
609                                    call scansubject again. */
610     }                           /* end while - done with the entire sequence.
611                                  */
612 
613     /* increment the offset in the diagonal array */
614     Blast_ExtendWordExit(ewp, subject->length);
615 
616     Blast_UngappedStatsUpdate(ungapped_stats, totalhits, hits_extended,
617                               ungapped_hsps->total);
618     return 0;
619 }
620 
621 static Int2
s_BlastRPSWordFinder_OneHit(const BLAST_SequenceBlk * subject,const BLAST_SequenceBlk * query,const LookupTableWrap * lookup_wrap,Blast_ExtendWord * ewp,Int4 ** matrix,Int4 cutoff,Int4 dropoff,BlastInitHitList * ungapped_hsps,BlastUngappedStats * ungapped_stats)622 s_BlastRPSWordFinder_OneHit(const BLAST_SequenceBlk * subject,
623                             const BLAST_SequenceBlk * query,
624                             const LookupTableWrap * lookup_wrap,
625                             Blast_ExtendWord * ewp,
626                             Int4 ** matrix,
627                             Int4 cutoff,
628                             Int4 dropoff,
629                             BlastInitHitList * ungapped_hsps,
630                             BlastUngappedStats * ungapped_stats)
631 {
632     BlastRPSLookupTable *lookup = NULL;
633     Int4 wordsize;
634     Int4 hits = 0;
635     Int4 totalhits = 0;
636     Int4 first_offset = 0;
637     Int4 last_offset;
638     Int4 hsp_q, hsp_s, hsp_len;
639     Int4 s_last_off;
640     Int4 i, j;
641     Int4 score;
642     Int4 diag_offset, diag_coord, diag_mask, diff;
643     DiagStruct *diag_array;
644     Int4 hits_extended = 0;
645     BLAST_DiagTable * diag = ewp->diag_table;
646 
647     ASSERT(diag != NULL);
648 
649     diag_offset = diag->offset;
650     diag_array = diag->hit_level_array;
651     ASSERT(diag_array);
652 
653     diag_mask = diag->diag_mask;
654 
655     lookup = (BlastRPSLookupTable *) lookup_wrap->lut;
656     wordsize = lookup->wordsize;
657     last_offset = subject->length - wordsize;
658 
659     while (first_offset <= last_offset) {
660         /* scan the subject sequence for hits */
661         hits = BlastRPSScanSubject(lookup_wrap, subject, &first_offset);
662 
663         totalhits += hits;
664         for (i = 0; i < lookup->num_buckets; i++) {
665             RPSBucket *curr_bucket = lookup->bucket_array + i;
666             BlastOffsetPair *offset_pairs = curr_bucket->offset_pairs;
667             hits = curr_bucket->num_filled;
668 
669             /* handle each hit in the neighborhood. Because the neighborhood
670                is small and there should be many hits there, proceeding one
671                bucket at a time provides maximum reuse of data structures. A
672                side benefit is that ungapped extensions also cluster together
673                in the concatenated database, reusing PSSM data as well. */
674             for (j = 0; j < hits; ++j) {
675                 Uint4 query_offset = offset_pairs[j].qs_offsets.q_off;
676                 Uint4 subject_offset = offset_pairs[j].qs_offsets.s_off;
677                 diag_coord = (subject_offset - query_offset) & diag_mask;
678                 diff = subject_offset -
679                     (diag_array[diag_coord].last_hit - diag_offset);
680 
681                 /* do an extension, but only if we have not already extended
682                    this far */
683                 if (diff >= 0) {
684                     ++hits_extended;
685                     score = s_BlastAaExtendOneHit(matrix, subject, query,
686                                                   subject_offset, query_offset,
687                                                   dropoff, &hsp_q, &hsp_s,
688                                                   &hsp_len, wordsize, TRUE,
689                                                   &s_last_off);
690 
691                     /* if the hsp meets the score threshold, report it */
692                     if (score >= cutoff) {
693                         BlastSaveInitHsp(ungapped_hsps, hsp_q, hsp_s,
694                                          query_offset, subject_offset,
695                                          hsp_len, score);
696                     }
697                     diag_array[diag_coord].last_hit =
698                         s_last_off - (wordsize - 1) + diag_offset;
699                 }
700             }                   /* end for (one bucket) */
701         }                       /* end for (all buckets) */
702     }                           /* end while */
703 
704     /* increment the offset in the diagonal array (no windows used) */
705     Blast_ExtendWordExit(ewp, subject->length);
706 
707     Blast_UngappedStatsUpdate(ungapped_stats, totalhits, hits_extended,
708                               ungapped_hsps->total);
709     return 0;
710 }
711 
712 static Int2
s_BlastAaWordFinder_OneHit(const BLAST_SequenceBlk * subject,const BLAST_SequenceBlk * query,const LookupTableWrap * lookup_wrap,Blast_ExtendWord * ewp,Int4 ** matrix,const BlastInitialWordParameters * word_params,BlastQueryInfo * query_info,BlastOffsetPair * NCBI_RESTRICT offset_pairs,Int4 array_size,BlastInitHitList * ungapped_hsps,BlastUngappedStats * ungapped_stats)713 s_BlastAaWordFinder_OneHit(const BLAST_SequenceBlk * subject,
714                            const BLAST_SequenceBlk * query,
715                            const LookupTableWrap * lookup_wrap,
716                            Blast_ExtendWord * ewp,
717                            Int4 ** matrix,
718                            const BlastInitialWordParameters * word_params,
719                            BlastQueryInfo * query_info,
720                            BlastOffsetPair * NCBI_RESTRICT offset_pairs,
721                            Int4 array_size,
722                            BlastInitHitList * ungapped_hsps,
723                            BlastUngappedStats * ungapped_stats)
724 {
725     Boolean use_pssm = FALSE;
726     Int4 wordsize;
727     Int4 hits = 0;
728     Int4 totalhits = 0;
729     Int4 hsp_q, hsp_s, hsp_len;
730     Int4 s_last_off;
731     Int4 i;
732     Int4 score;
733     Int4 diag_offset, diag_coord, diag_mask, diff;
734     DiagStruct *diag_array;
735     Int4 hits_extended = 0;
736     BLAST_DiagTable * diag = ewp->diag_table;
737     TAaScanSubjectFunction scansub;
738     Int4 scan_range[3];
739 
740     ASSERT(diag != NULL);
741 
742     diag_offset = diag->offset;
743     diag_array = diag->hit_level_array;
744     ASSERT(diag_array);
745 
746     diag_mask = diag->diag_mask;
747 
748     if (lookup_wrap->lut_type == eAaLookupTable) {
749         BlastAaLookupTable *lookup =
750                         (BlastAaLookupTable *)(lookup_wrap->lut);
751         scansub = (TAaScanSubjectFunction)(lookup->scansub_callback);
752         wordsize = lookup->word_length;
753         use_pssm = lookup->use_pssm;
754     }
755     else {
756         BlastCompressedAaLookupTable *lookup =
757                         (BlastCompressedAaLookupTable *)(lookup_wrap->lut);
758         scansub = (TAaScanSubjectFunction)(lookup->scansub_callback);
759         wordsize = lookup->word_length;
760     }
761 
762     scan_range[0] = 0;
763     scan_range[1] = subject->seq_ranges[0].left;
764     scan_range[2] = subject->seq_ranges[0].right - wordsize;
765 
766     while (scan_range[1] <= scan_range[2]) {
767         /* scan the subject sequence for hits */
768         hits = scansub(lookup_wrap, subject,
769                        offset_pairs, array_size, scan_range);
770 
771         totalhits += hits;
772         /* for each hit, */
773         for (i = 0; i < hits; ++i) {
774             Uint4 query_offset = offset_pairs[i].qs_offsets.q_off;
775             Uint4 subject_offset = offset_pairs[i].qs_offsets.s_off;
776             diag_coord = (subject_offset - query_offset) & diag_mask;
777             diff = subject_offset -
778                 (diag_array[diag_coord].last_hit - diag_offset);
779 
780             /* do an extension, but only if we have not already extended this
781                far */
782             if (diff >= 0) {
783                 Int4 curr_context = BSearchContextInfo(query_offset,
784                                                        query_info);
785                 BlastUngappedCutoffs *cutoffs = word_params->cutoffs +
786                                                         curr_context;
787                 score = s_BlastAaExtendOneHit(matrix, subject, query,
788                                               subject_offset, query_offset,
789                                               cutoffs->x_dropoff,
790                                               &hsp_q, &hsp_s, &hsp_len,
791                                               wordsize, use_pssm, &s_last_off);
792 
793                 /* if the hsp meets the score threshold, report it */
794                 if (score >= cutoffs->cutoff_score) {
795                     BlastSaveInitHsp(ungapped_hsps, hsp_q, hsp_s,
796                                      query_offset, subject_offset, hsp_len,
797                                      score);
798                 }
799                 diag_array[diag_coord].last_hit =
800                     s_last_off - (wordsize - 1) + diag_offset;
801                 ++hits_extended;
802             }
803         }                       /* end for */
804     }                           /* end while */
805 
806     /* increment the offset in the diagonal array (no windows used) */
807     Blast_ExtendWordExit(ewp, subject->length);
808 
809     Blast_UngappedStatsUpdate(ungapped_stats, totalhits, hits_extended,
810                               ungapped_hsps->total);
811     return 0;
812 }
813 
814 /**
815  * Beginning at s_off and q_off in the subject and query, respectively,
816  * extend to the right until the cumulative score becomes negative or
817  * drops by at least 'dropoff', or the end of at least one sequence
818  * is reached.
819  *
820  * @param matrix the substitution matrix [in]
821  * @param subject subject sequence [in]
822  * @param query query sequence [in]
823  * @param s_off subject offset [in]
824  * @param q_off query offset [in]
825  * @param dropoff the X dropoff parameter [in]
826  * @param length the length of the computed extension [out]
827  * @param maxscore the score derived from a previous left extension [in]
828  * @param s_last_off the rightmost subject offset examined [out]
829  * @return The score of the extension
830  */
s_BlastAaExtendRight(Int4 ** matrix,const BLAST_SequenceBlk * subject,const BLAST_SequenceBlk * query,Int4 s_off,Int4 q_off,Int4 dropoff,Int4 * length,Int4 maxscore,Int4 * s_last_off)831 static Int4 s_BlastAaExtendRight(Int4 ** matrix,
832                         const BLAST_SequenceBlk * subject,
833                         const BLAST_SequenceBlk * query,
834                         Int4 s_off, Int4 q_off, Int4 dropoff,
835                         Int4 * length, Int4 maxscore, Int4 * s_last_off)
836 {
837     Int4 i, n, best_i = -1;
838     Int4 score = maxscore;
839 
840     Uint1 *s, *q;
841     n = MIN(subject->length - s_off, query->length - q_off);
842 
843     s = subject->sequence + s_off;
844     q = query->sequence + q_off;
845 
846     for (i = 0; i < n; i++) {
847         score += matrix[q[i]][s[i]];
848 
849         if (score > maxscore) {
850             maxscore = score;
851             best_i = i;
852         }
853 
854         /* The comparison below is really >= and is different than the old
855            code (e.g., blast.c:BlastWordExtend_prelim). In the old code the
856            loop continued as long as sum > X (X being negative).  The loop
857            control here is different and we *break out* when the if statement
858            below is true. */
859         if (score <= 0 || (maxscore - score) >= dropoff)
860             break;
861     }
862 
863     *length = best_i + 1;
864     *s_last_off = s_off + i;
865     return maxscore;
866 }
867 
868 
869 /**
870  * Beginning at s_off and q_off in the subject and query, respectively,
871  * extend to the left until the cumulative score drops by at least
872  * 'dropoff', or the end of at least one sequence is reached.
873  *
874  * @param matrix the substitution matrix [in]
875  * @param subject subject sequence [in]
876  * @param query query sequence [in]
877  * @param s_off subject offset [in]
878  * @param q_off query offset [in]
879  * @param dropoff the X dropoff parameter [in]
880  * @param length the length of the computed extension [out]
881  * @param maxscore the best running score from a previous extension;
882  *                 the running score of the current extension must exceed
883  *                 this value if the extension is to be of nonzero size [in]
884  * @return The score of the extension
885  */
s_BlastAaExtendLeft(Int4 ** matrix,const BLAST_SequenceBlk * subject,const BLAST_SequenceBlk * query,Int4 s_off,Int4 q_off,Int4 dropoff,Int4 * length,Int4 maxscore)886 static Int4 s_BlastAaExtendLeft(Int4 ** matrix,
887                        const BLAST_SequenceBlk * subject,
888                        const BLAST_SequenceBlk * query,
889                        Int4 s_off, Int4 q_off, Int4 dropoff,
890                        Int4 * length, Int4 maxscore)
891 {
892     Int4 i, n, best_i;
893     Int4 score = maxscore;
894 
895     Uint1 *s, *q;
896 
897     n = MIN(s_off, q_off);
898     best_i = n + 1;
899 
900     s = subject->sequence + s_off - n;
901     q = query->sequence + q_off - n;
902 
903     for (i = n; i >= 0; i--) {
904         score += matrix[q[i]][s[i]];
905 
906         if (score > maxscore) {
907             maxscore = score;
908             best_i = i;
909         }
910         /* The comparison below is really >= and is different than the old
911            code (e.g., blast.c:BlastWordExtend_prelim). In the old code the
912            loop continued as long as sum > X (X being negative).  The loop
913            control here is different and we *break out* when the if statement
914            below is true. */
915         if ((maxscore - score) >= dropoff)
916             break;
917     }
918 
919     *length = n - best_i + 1;
920     return maxscore;
921 }
922 
923 /**
924  * Identical to BlastAaExtendRight, except the score matrix
925  * is position-specific
926  *
927  * @param matrix the substitution matrix representing query sequence [in]
928  * @param subject subject sequence [in]
929  * @param query_size number of rows in the scoring matrix [in]
930  * @param s_off subject offset [in]
931  * @param q_off query offset [in]
932  * @param dropoff the X dropoff parameter [in]
933  * @param length the length of the extension [out]
934  * @param maxscore the score derived from a previous left extension [in]
935  * @param s_last_off the rightmost subject offset examined [out]
936  * @return The score of the extension
937  */
s_BlastPSSMExtendRight(Int4 ** matrix,const BLAST_SequenceBlk * subject,Int4 query_size,Int4 s_off,Int4 q_off,Int4 dropoff,Int4 * length,Int4 maxscore,Int4 * s_last_off)938 static Int4 s_BlastPSSMExtendRight(Int4 ** matrix,
939                           const BLAST_SequenceBlk * subject,
940                           Int4 query_size, Int4 s_off,
941                           Int4 q_off, Int4 dropoff,
942                           Int4 * length, Int4 maxscore, Int4 * s_last_off)
943 {
944     Int4 i, n, best_i = -1;
945     Int4 score = maxscore;
946     Uint1 *s;
947 
948     n = MIN(subject->length - s_off, query_size - q_off);
949     s = subject->sequence + s_off;
950 
951     for (i = 0; i < n; i++) {
952         score += matrix[q_off + i][s[i]];
953 
954         if (score > maxscore) {
955             maxscore = score;
956             best_i = i;
957         }
958 
959         /* The comparison below is really >= and is different than the old
960            code (e.g., blast.c:BlastWordExtend_prelim). In the old code the
961            loop continued as long as sum > X (X being negative).  The loop
962            control here is different and we *break out* when the if statement
963            below is true. */
964         if (score <= 0 || (maxscore - score) >= dropoff)
965             break;
966     }
967 
968     *length = best_i + 1;
969     *s_last_off = s_off + i;
970     return maxscore;
971 }
972 
973 /**
974  * Identical to BlastAaExtendLeft, except the query is
975  * represented by a position-specific score matrix.
976  *
977  * @param matrix the substitution matrix representing the query [in]
978  * @param subject subject sequence [in]
979  * @param s_off subject offset [in]
980  * @param q_off query offset [in]
981  * @param dropoff the X dropoff parameter [in]
982  * @param length the length of the extension [out]
983  * @param maxscore the score so far (probably from initial word hit) [in]
984  * @return The score of the extension
985  */
s_BlastPSSMExtendLeft(Int4 ** matrix,const BLAST_SequenceBlk * subject,Int4 s_off,Int4 q_off,Int4 dropoff,Int4 * length,Int4 maxscore)986 static Int4 s_BlastPSSMExtendLeft(Int4 ** matrix,
987                          const BLAST_SequenceBlk * subject,
988                          Int4 s_off, Int4 q_off,
989                          Int4 dropoff, Int4 * length, Int4 maxscore)
990 {
991     Int4 i, n, best_i;
992     Int4 score = maxscore;
993     Uint1 *s;
994 
995     n = MIN(s_off, q_off);
996     best_i = n + 1;
997     s = subject->sequence + s_off - n;
998 
999     for (i = n; i >= 0; i--) {
1000         score += matrix[q_off - n + i][s[i]];
1001 
1002         if (score > maxscore) {
1003             maxscore = score;
1004             best_i = i;
1005         }
1006         /* The comparison below is really >= and is different than the old
1007            code (e.g., blast.c:BlastWordExtend_prelim). In the old code the
1008            loop continued as long as sum > X (X being negative).  The loop
1009            control here is different and we *break out* when the if statement
1010            below is true. */
1011         if ((maxscore - score) >= dropoff)
1012             break;
1013     }
1014 
1015     *length = n - best_i + 1;
1016     return maxscore;
1017 }
1018 
1019 static Int4
s_BlastAaExtendOneHit(Int4 ** matrix,const BLAST_SequenceBlk * subject,const BLAST_SequenceBlk * query,Int4 s_off,Int4 q_off,Int4 dropoff,Int4 * hsp_q,Int4 * hsp_s,Int4 * hsp_len,Int4 word_size,Boolean use_pssm,Int4 * s_last_off)1020 s_BlastAaExtendOneHit(Int4 ** matrix,
1021                       const BLAST_SequenceBlk * subject,
1022                       const BLAST_SequenceBlk * query,
1023                       Int4 s_off, Int4 q_off,
1024                       Int4 dropoff, Int4 * hsp_q, Int4 * hsp_s,
1025                       Int4 * hsp_len, Int4 word_size,
1026                       Boolean use_pssm, Int4 * s_last_off)
1027 {
1028     Int4 score = 0, left_score, total_score, sum = 0;
1029     Int4 left_disp = 0, right_disp = 0;
1030     Int4 q_left_off = q_off, q_right_off =
1031         q_off + word_size, q_best_left_off = q_off;
1032     Int4 s_left_off, s_right_off;
1033     Int4 init_hit_width = 0;
1034     Int4 i;                     /* loop variable. */
1035     Uint1 *q = query->sequence;
1036     Uint1 *s = subject->sequence;
1037 
1038     for (i = 0; i < word_size; i++) {
1039         if (use_pssm)
1040             sum += matrix[q_off + i][s[s_off + i]];
1041         else
1042             sum += matrix[q[q_off + i]][s[s_off + i]];
1043 
1044         if (sum > score) {
1045             score = sum;
1046             q_best_left_off = q_left_off;
1047             q_right_off = q_off + i;
1048         } else if (sum <= 0) {
1049             sum = 0;
1050             q_left_off = q_off + i + 1;
1051         }
1052     }
1053 
1054     init_hit_width = q_right_off - q_left_off + 1;
1055 
1056     q_left_off = q_best_left_off;
1057 
1058     s_left_off = q_left_off + (s_off - q_off);
1059     s_right_off = q_right_off + (s_off - q_off);
1060 
1061     if (use_pssm) {
1062         left_score = s_BlastPSSMExtendLeft(matrix, subject,
1063                                            s_left_off - 1, q_left_off - 1,
1064                                            dropoff, &left_disp, score);
1065 
1066         total_score = s_BlastPSSMExtendRight(matrix, subject, query->length,
1067                                              s_right_off + 1, q_right_off + 1,
1068                                              dropoff, &right_disp, left_score,
1069                                              s_last_off);
1070     } else {
1071         left_score = s_BlastAaExtendLeft(matrix, subject, query,
1072                                          s_left_off - 1, q_left_off - 1,
1073                                          dropoff, &left_disp, score);
1074 
1075         total_score = s_BlastAaExtendRight(matrix, subject, query,
1076                                            s_right_off + 1, q_right_off + 1,
1077                                            dropoff, &right_disp, left_score,
1078                                            s_last_off);
1079     }
1080 
1081     *hsp_q = q_left_off - left_disp;
1082     *hsp_s = s_left_off - left_disp;
1083     *hsp_len = left_disp + right_disp + init_hit_width;
1084 
1085     return total_score;
1086 }
1087 
1088 static Int4
s_BlastAaExtendTwoHit(Int4 ** matrix,const BLAST_SequenceBlk * subject,const BLAST_SequenceBlk * query,Int4 s_left_off,Int4 s_right_off,Int4 q_right_off,Int4 dropoff,Int4 * hsp_q,Int4 * hsp_s,Int4 * hsp_len,Boolean use_pssm,Int4 word_size,Boolean * right_extend,Int4 * s_last_off)1089 s_BlastAaExtendTwoHit(Int4 ** matrix,
1090                       const BLAST_SequenceBlk * subject,
1091                       const BLAST_SequenceBlk * query,
1092                       Int4 s_left_off, Int4 s_right_off,
1093                       Int4 q_right_off, Int4 dropoff,
1094                       Int4 * hsp_q, Int4 * hsp_s, Int4 * hsp_len,
1095                       Boolean use_pssm, Int4 word_size,
1096                       Boolean * right_extend, Int4 * s_last_off)
1097 {
1098     Int4 left_d = 0, right_d = 0;       /* left and right displacements */
1099     Int4 left_score = 0, right_score = 0;       /* left and right scores */
1100     Int4 i, score = 0;
1101     Uint1 *s = subject->sequence;
1102     Uint1 *q = query->sequence;
1103 
1104     /* find one beyond the position (up to word_size-1 letters to the right)
1105        that gives the largest starting score */
1106     /* Use "one beyond" to make the numbering consistent with how it's done
1107        for BlastAaExtendOneHit and the "Extend" functions called here. */
1108     for (i = 0; i < word_size; i++) {
1109         if (use_pssm)
1110             score += matrix[q_right_off + i][s[s_right_off + i]];
1111         else
1112             score += matrix[q[q_right_off + i]][s[s_right_off + i]];
1113 
1114         if (score > left_score) {
1115             left_score = score;
1116             right_d = i + 1;    /* Position is one beyond the end of the
1117                                    word. */
1118         }
1119     }
1120     q_right_off += right_d;
1121     s_right_off += right_d;
1122 
1123     right_d = 0;
1124     *right_extend = FALSE;
1125     *s_last_off = s_right_off;
1126 
1127     /* first, try to extend left, from the second hit to the first hit. */
1128     if (use_pssm)
1129         left_score = s_BlastPSSMExtendLeft(matrix, subject,
1130                                            s_right_off - 1, q_right_off - 1,
1131                                            dropoff, &left_d, 0);
1132     else
1133         left_score = s_BlastAaExtendLeft(matrix, subject, query,
1134                                          s_right_off - 1, q_right_off - 1,
1135                                          dropoff, &left_d, 0);
1136 
1137     /* Extend to the right only if left extension reached the first hit. */
1138     if (left_d >= (s_right_off - s_left_off)) {
1139 
1140         *right_extend = TRUE;
1141         if (use_pssm)
1142             right_score = s_BlastPSSMExtendRight(matrix, subject, query->length,
1143                                                  s_right_off, q_right_off,
1144                                                  dropoff, &right_d, left_score,
1145                                                  s_last_off);
1146         else
1147             right_score = s_BlastAaExtendRight(matrix, subject, query,
1148                                                s_right_off, q_right_off,
1149                                                dropoff, &right_d, left_score,
1150                                                s_last_off);
1151     }
1152 
1153 
1154     *hsp_q = q_right_off - left_d;
1155     *hsp_s = s_right_off - left_d;
1156     *hsp_len = left_d + right_d;
1157     return MAX(left_score, right_score);
1158 }
1159