1 /* $Id: blast_sw.c 504861 2016-06-20 15:45:40Z boratyng $
2  * ===========================================================================
3  *
4  *                     PUBLIC DOMAIN NOTICE
5  *            National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Jason Papadopoulos
27  *
28  */
29 
30 /** @file blast_sw.c
31  * Smith-Waterman gapped alignment, for use with infrastructure of BLAST
32  * @sa blast_sw.h
33  */
34 
35 #include <algo/blast/core/blast_sw.h>
36 #include <algo/blast/core/blast_util.h> /* for NCBI2NA_UNPACK_BASE */
37 
38 /** swap (pointers to) a pair of sequences */
39 #define SWAP_SEQS(A, B) {const Uint1 *tmp = (A); (A) = (B); (B) = tmp; }
40 
41 /** swap two integers */
42 #define SWAP_INT(A, B) {Int4 tmp = (A); (A) = (B); (B) = tmp; }
43 
44 /** Compute the score of the best local alignment between
45  *  two protein sequences. When using Smith-Waterman, the vast
46  *  majority of the runtime is tied up in this routine.
47  * @param A The first sequence [in]
48  * @param a_size Length of the first sequence [in]
49  * @param B The second sequence [in]
50  * @param b_size Length of the second sequence [in]
51  * @param gap_open Gap open penalty [in]
52  * @param gap_extend Gap extension penalty [in]
53  * @param gap_align Auxiliary data for gapped alignment
54  *             (used for score matrix info) [in]
55  * @return The score of the best local alignment between A and B
56  */
s_SmithWatermanScoreOnly(const Uint1 * A,Int4 a_size,const Uint1 * B,Int4 b_size,Int4 gap_open,Int4 gap_extend,BlastGapAlignStruct * gap_align)57 static Int4 s_SmithWatermanScoreOnly(const Uint1 *A, Int4 a_size,
58                             const Uint1 *B, Int4 b_size,
59                             Int4 gap_open, Int4 gap_extend,
60                             BlastGapAlignStruct *gap_align)
61 {
62    Int4 i, j;
63    Int4 **matrix;
64    Int4 *matrix_row;
65 
66    Int4 final_best_score;
67    Int4 best_score;
68    Int4 insert_score;
69    Int4 row_score;
70    BlastGapDP *scores;
71 
72    Boolean is_pssm = gap_align->positionBased;
73    Int4 gap_open_extend = gap_open + gap_extend;
74 
75    /* choose the score matrix */
76    if (is_pssm) {
77       matrix = gap_align->sbp->psi_matrix->pssm->data;
78    }
79    else {
80       /* for square score matrices, assume the matrix
81          is symmetric. This means that A and B can be
82          switched without changing the score, and this
83          saves memory if one sequence is large but the
84          other is not */
85       if (a_size < b_size) {
86          SWAP_SEQS(A, B);
87          SWAP_INT(a_size, b_size);
88       }
89       matrix = gap_align->sbp->matrix->data;
90    }
91 
92    /* allocate space for scratch structures */
93    if (b_size + 1 > gap_align->dp_mem_alloc) {
94       gap_align->dp_mem_alloc = MAX(b_size + 100,
95                              2 * gap_align->dp_mem_alloc);
96       sfree(gap_align->dp_mem);
97       gap_align->dp_mem = (BlastGapDP *)malloc(gap_align->dp_mem_alloc *
98                                                sizeof(BlastGapDP));
99    }
100    scores = gap_align->dp_mem;
101    memset(scores, 0, (b_size + 1) * sizeof(BlastGapDP));
102    final_best_score = 0;
103 
104 
105    for (i = 1; i <= a_size; i++) {
106 
107       if (is_pssm)
108          matrix_row = matrix[i-1];
109       else
110          matrix_row = matrix[A[i-1]];
111 
112       insert_score = 0;
113       row_score = 0;
114 
115       for (j = 1; j <= b_size; j++) {
116 
117          /* score of best alignment ending at (i,j) with gap in B */
118          best_score = scores[j].best_gap - gap_extend;
119          if (scores[j].best - gap_open_extend > best_score)
120             best_score = scores[j].best - gap_open_extend;
121          scores[j].best_gap = best_score;
122 
123          /* score of best alignment ending at (i,j) with gap in A */
124          best_score = insert_score - gap_extend;
125          if (row_score - gap_open_extend > best_score)
126             best_score = row_score - gap_open_extend;
127          insert_score = best_score;
128 
129          /* score of best alignment ending at (i,j) */
130          best_score = MAX(scores[j-1].best + matrix_row[B[j-1]], 0);
131          if (insert_score > best_score)
132             best_score = insert_score;
133          if (scores[j].best_gap > best_score)
134             best_score = scores[j].best_gap;
135 
136          if (best_score > final_best_score)
137             final_best_score = best_score;
138 
139          scores[j-1].best = row_score;
140          row_score = best_score;
141       }
142 
143       scores[j-1].best = row_score;
144    }
145 
146    return final_best_score;
147 }
148 
149 
150 /** Compute the score of the best local alignment between
151  *  two nucleotide sequences. One of the sequences must be in
152  *  packed format. For nucleotide Smith-Waterman, the vast
153  *  majority of the runtime is tied up in this routine.
154  * @param B The first sequence (must be in ncbi2na format) [in]
155  * @param b_size Length of the first sequence [in]
156  * @param A The second sequence [in]
157  * @param a_size Length of the second sequence [in]
158  * @param gap_open Gap open penalty [in]
159  * @param gap_extend Gap extension penalty [in]
160  * @param gap_align Auxiliary data for gapped alignment
161  *             (used for score matrix info) [in]
162  * @return The score of the best local alignment between A and B
163  */
s_NuclSmithWaterman(const Uint1 * B,Int4 b_size,const Uint1 * A,Int4 a_size,Int4 gap_open,Int4 gap_extend,BlastGapAlignStruct * gap_align)164 static Int4 s_NuclSmithWaterman(const Uint1 *B, Int4 b_size,
165                                 const Uint1 *A, Int4 a_size,
166                                 Int4 gap_open, Int4 gap_extend,
167                                 BlastGapAlignStruct *gap_align)
168 {
169    Int4 i, j;
170    Int4 **matrix;
171    Int4 *matrix_row;
172 
173    Int4 final_best_score;
174    Int4 best_score;
175    Int4 insert_score;
176    Int4 row_score;
177    BlastGapDP *scores;
178 
179    Int4 gap_open_extend = gap_open + gap_extend;
180 
181    /* position-specific scoring is not allowed, because
182       the loops below assume the score matrix is symmetric */
183    matrix = gap_align->sbp->matrix->data;
184 
185    if (a_size + 1 > gap_align->dp_mem_alloc) {
186       gap_align->dp_mem_alloc = MAX(a_size + 100,
187                              2 * gap_align->dp_mem_alloc);
188       sfree(gap_align->dp_mem);
189       gap_align->dp_mem = (BlastGapDP *)malloc(gap_align->dp_mem_alloc *
190                                                sizeof(BlastGapDP));
191    }
192    scores = gap_align->dp_mem;
193    memset(scores, 0, (a_size + 1) * sizeof(BlastGapDP));
194    final_best_score = 0;
195 
196 
197    for (i = 1; i <= b_size; i++) {
198 
199       /* The only real difference between this routine
200          and its unpacked counterpart is the choice of
201          score matrix row in the outer loop */
202       Int4 base_pair = NCBI2NA_UNPACK_BASE(B[(i-1)/4], (3-((i-1)%4)));
203       matrix_row = matrix[base_pair];
204       insert_score = 0;
205       row_score = 0;
206 
207       for (j = 1; j <= a_size; j++) {
208 
209          /* score of best alignment ending at (i,j) with gap in A */
210          best_score = scores[j].best_gap - gap_extend;
211          if (scores[j].best - gap_open_extend > best_score)
212             best_score = scores[j].best - gap_open_extend;
213          scores[j].best_gap = best_score;
214 
215          /* score of best alignment ending at (i,j) with gap in B */
216          best_score = insert_score - gap_extend;
217          if (row_score - gap_open_extend > best_score)
218             best_score = row_score - gap_open_extend;
219          insert_score = best_score;
220 
221          /* score of best alignment ending at (i,j) */
222          best_score = MAX(scores[j-1].best + matrix_row[A[j-1]], 0);
223          if (insert_score > best_score)
224             best_score = insert_score;
225          if (scores[j].best_gap > best_score)
226             best_score = scores[j].best_gap;
227 
228          if (best_score > final_best_score)
229             final_best_score = best_score;
230 
231          scores[j-1].best = row_score;
232          row_score = best_score;
233       }
234 
235       scores[j-1].best = row_score;
236    }
237 
238    return final_best_score;
239 }
240 
241 
242 /** Values for the editing script operations in traceback */
243 enum {
244    EDIT_SUB         = eGapAlignSub,    /**< Substitution */
245    EDIT_GAP_IN_A     = eGapAlignDel,    /**< Deletion */
246    EDIT_GAP_IN_B     = eGapAlignIns,    /**< Insertion */
247    EDIT_OP_MASK      = 0x07, /**< Mask for edit script operations */
248 
249    EDIT_START_GAP_A  = 0x10, /**< open a gap in A */
250    EDIT_START_GAP_B  = 0x20  /**< open a gap in B */
251 };
252 
253 
254 /** Generate the traceback for a single local alignment
255  *  between two (unpacked) sequences, then create an HSP
256  *  for the alignment and add to a list of such HSPs
257  * @param program_number Blast program requesting traceback [in]
258  * @param trace 2-D array of edit actions, size (a_size+1) x (b_size+1) [in]
259  * @param A The first sequence [in]
260  * @param B The second sequence [in]
261  * @param b_size Length of the second sequence [in]
262  * @param gap_open Gap open penalty [in]
263  * @param gap_extend Gap extension penalty [in]
264  * @param gap_align Auxiliary data for gapped alignment
265  *             (used for score matrix info) [in]
266  * @param a_end The alignment end offset on A (plus one) [in]
267  * @param b_end The alignment end offset on B (plus one) [in]
268  * @param best_score Score of the alignment [in]
269  * @param hsp_list Collection of alignments found so far [in][out]
270  * @param swapped TRUE if A and B were swapped before the alignment
271  *               was found [in]
272  * @param template_hsp Placeholder alignment, used only to
273  *               determine contexts and frames [in]
274  * @param score_options Structure containing gap penalties [in]
275  * @param hit_options Structure used for percent identity calculation [in]
276  * @param start_shift Bias to be applied to subject offsets [in]
277  */
s_GetTraceback(EBlastProgramType program_number,Uint1 * trace,const Uint1 * A,const Uint1 * B,Int4 b_size,Int4 gap_open,Int4 gap_extend,BlastGapAlignStruct * gap_align,Int4 a_end,Int4 b_end,Int4 best_score,BlastHSPList * hsp_list,Boolean swapped,BlastHSP * template_hsp,const BlastScoringOptions * score_options,const BlastHitSavingOptions * hit_options,Int4 start_shift)278 static void s_GetTraceback(EBlastProgramType program_number,
279                            Uint1 *trace, const Uint1 *A, const Uint1 *B, Int4 b_size,
280                            Int4 gap_open, Int4 gap_extend,
281                            BlastGapAlignStruct *gap_align,
282                            Int4 a_end, Int4 b_end, Int4 best_score,
283                            BlastHSPList *hsp_list, Boolean swapped,
284                            BlastHSP *template_hsp,
285                            const BlastScoringOptions *score_options,
286                            const BlastHitSavingOptions *hit_options,
287                            Int4 start_shift)
288 {
289    Int4 i, j;
290    Uint1 script;
291    Uint1 next_action;
292    Uint1 *traceback_row;
293    Int4 a_start, b_start;
294    Int4 **matrix;
295    Int4 curr_score = -best_score;
296    Boolean is_pssm = gap_align->positionBased;
297    GapPrelimEditBlock *prelim_tback = gap_align->fwd_prelim_tback;
298    GapEditScript *final_tback;
299    BlastHSP *new_hsp;
300 
301    i = a_end;
302    j = b_end;
303    traceback_row = trace + i * (b_size + 1);
304    script = traceback_row[j] & EDIT_OP_MASK;
305    GapPrelimEditBlockReset(prelim_tback);
306 
307    if (is_pssm)
308       matrix = gap_align->sbp->psi_matrix->pssm->data;
309    else
310       matrix = gap_align->sbp->matrix->data;
311 
312    /* Only the start point of the alignment is unknown, but
313       there was no bookkeeping performed to remember where the
314       start point is located. We know the list of traceback
315       actions backwards from (i,j) and know the score of the
316       alignment, so the start point will be the offset pair where
317       the score becomes zero after edit operations are applied */
318 
319    while (curr_score != 0) {
320 
321       next_action = traceback_row[j];
322       GapPrelimEditBlockAdd(prelim_tback, (EGapAlignOpType)script, 1);
323 
324       switch(script) {
325       case EDIT_SUB:
326          if (is_pssm)
327             curr_score += matrix[i-1][B[j-1]];
328          else
329             curr_score += matrix[A[i-1]][B[j-1]];
330 
331          i--; j--;
332          traceback_row -= b_size + 1;
333          script = traceback_row[j] & EDIT_OP_MASK;
334          break;
335       case EDIT_GAP_IN_A:
336          j--;
337          if (next_action & EDIT_START_GAP_A) {
338             script = traceback_row[j] & EDIT_OP_MASK;
339             curr_score -= gap_open;
340          }
341          curr_score -= gap_extend;
342          break;
343       case EDIT_GAP_IN_B:
344          i--;
345          traceback_row -= b_size + 1;
346          if (next_action & EDIT_START_GAP_B) {
347             script = traceback_row[j] & EDIT_OP_MASK;
348             curr_score -= gap_open;
349          }
350          curr_score -= gap_extend;
351          break;
352       }
353    }
354 
355    /* found the start point. Now create the final (reversed)
356       edit script; if A and B were swapped before calling
357       this routine, swap them back */
358    a_start = i;
359    b_start = j;
360    final_tback = GapEditScriptNew(prelim_tback->num_ops);
361    for (i = prelim_tback->num_ops - 1, j = 0; i >= 0; i--, j++) {
362       GapPrelimEditScript *p = prelim_tback->edit_ops + i;
363       final_tback->num[j] = p->num;
364       final_tback->op_type[j] = p->op_type;
365       if (swapped) {
366          if (p->op_type == eGapAlignIns)
367             final_tback->op_type[j] = eGapAlignDel;
368          else if (p->op_type == eGapAlignDel)
369             final_tback->op_type[j] = eGapAlignIns;
370       }
371    }
372 
373    if (swapped) {
374       SWAP_SEQS(A, B);
375       SWAP_INT(a_start, b_start);
376       SWAP_INT(a_end, b_end);
377    }
378 
379    /* construct an HSP, verify it meets length and percent
380       identity criteria, and save it */
381    Blast_HSPInit(a_start, a_end, b_start, b_end,
382                  a_start, b_start, template_hsp->context,
383                  template_hsp->query.frame,
384                  template_hsp->subject.frame, best_score,
385                  &final_tback, &new_hsp);
386 
387    if (Blast_HSPTestIdentityAndLength(program_number, new_hsp, A, B,
388                                       score_options, hit_options)) {
389       Blast_HSPFree(new_hsp);
390    }
391    else {
392       Blast_HSPAdjustSubjectOffset(new_hsp, start_shift);
393       Blast_HSPListSaveHSP(hsp_list, new_hsp);
394    }
395 }
396 
397 
398 /** Auxiliary structures for Smith-Waterman alignment
399  *  with traceback
400  */
401 typedef struct BlastGapSW{
402    Int4 best;        /**< Score of best alignment at this position */
403    Int4 best_gap;     /**< Score of best alignment at this position that
404                         ends in a gap */
405    Int4 path_score;   /**< The highest score that the alignment at this
406                         position has previously achieved */
407    Int4 path_stop_i;   /**< Offset (plus one) on the first sequence where
408                         path_score occurs */
409    Int4 path_stop_j;   /**< Offset (plus one) on the second sequence where
410                         path_score occurs */
411 } BlastGapSW;
412 
413 /* See blast_sw.h for details */
SmithWatermanScoreWithTraceback(EBlastProgramType program_number,const Uint1 * A,Int4 a_size,const Uint1 * B,Int4 b_size,BlastHSP * template_hsp,BlastHSPList * hsp_list,const BlastScoringParameters * score_params,const BlastHitSavingParameters * hit_params,BlastGapAlignStruct * gap_align,Int4 start_shift,Int4 cutoff)414 void SmithWatermanScoreWithTraceback(EBlastProgramType program_number,
415                                      const Uint1 *A, Int4 a_size,
416                                      const Uint1 *B, Int4 b_size,
417                                      BlastHSP *template_hsp,
418                                      BlastHSPList *hsp_list,
419                                      const BlastScoringParameters *score_params,
420                                      const BlastHitSavingParameters *hit_params,
421                                      BlastGapAlignStruct *gap_align,
422                                      Int4 start_shift, Int4 cutoff)
423 {
424    Int4 i, j;
425    Int4 *matrix_row;
426    Int4 **matrix;
427    Boolean swapped = FALSE;
428 
429    Int4 best_score;
430    Int4 insert_score;
431    Int4 row_score;
432    Int4 row_path_score;
433    Int4 row_path_stop_i;
434    Int4 row_path_stop_j;
435    Int4 new_path_score;
436    Int4 new_path_stop_i;
437    Int4 new_path_stop_j;
438    BlastGapSW *scores;
439 
440    Boolean is_pssm = gap_align->positionBased;
441    Int4 gap_open = score_params->gap_open;
442    Int4 gap_extend = score_params->gap_extend;
443    Int4 gap_open_extend = gap_open + gap_extend;
444 
445    Uint1 *traceback_array;
446    Uint1 *traceback_row;
447    Uint1 script;
448 
449    /* choose the score matrix */
450    if (is_pssm) {
451       matrix = gap_align->sbp->psi_matrix->pssm->data;
452    }
453    else {
454       /* for square score matrices, assume the matrix
455          is symmetric. This means that A and B can be
456          switched without changing the score, and this
457          saves memory if one sequence is large but the
458          other is not */
459       if (a_size < b_size) {
460          swapped = TRUE;
461          SWAP_SEQS(A, B);
462          SWAP_INT(a_size, b_size);
463       }
464       matrix = gap_align->sbp->matrix->data;
465    }
466 
467    /* allocate space for scratch structures */
468    scores = (BlastGapSW *)calloc(b_size + 1, sizeof(BlastGapSW));
469    traceback_array = (Uint1 *)malloc((a_size + 1) * (b_size + 1) *
470                                      sizeof(Uint1));
471    traceback_row = traceback_array;
472    for (i = 0; i <= b_size; i++)
473       traceback_row[i] = EDIT_GAP_IN_A;
474    traceback_row += b_size + 1;
475 
476    for (i = 1; i <= a_size; i++) {
477 
478       if (is_pssm)
479          matrix_row = matrix[i-1];
480       else
481          matrix_row = matrix[A[i-1]];
482 
483       insert_score = 0;
484       row_score = 0;
485       row_path_stop_i = 0;
486       row_path_stop_j = 0;
487       row_path_score = 0;
488       traceback_row[0] = EDIT_GAP_IN_B;
489 
490       for (j = 1; j <= b_size; j++) {
491 
492          /* score of best alignment ending at (i,j) with gap in B */
493          best_score = scores[j].best_gap - gap_extend;
494          script = 0;
495          if (scores[j].best - gap_open_extend > best_score) {
496             script |= EDIT_START_GAP_B;
497             best_score = scores[j].best - gap_open_extend;
498          }
499          scores[j].best_gap = best_score;
500 
501          /* score of best alignment ending at (i,j) with gap in A */
502          best_score = insert_score - gap_extend;
503          if (row_score - gap_open_extend > best_score) {
504             script |= EDIT_START_GAP_A;
505             best_score = row_score - gap_open_extend;
506          }
507          insert_score = best_score;
508 
509          /* Every cell computed by Smith-Waterman lies on exactly
510             one highest-scoring path. This means that at any given
511             time there are only b_size possible paths that need
512             bookkeeping information. In addition to the 3-way
513             maximum needed to choose the highest score, we also
514             need to choose the path that (i,j) extends. Begin by
515             assuming (i,j) extends the substitution path */
516          best_score = MAX(scores[j-1].best + matrix_row[B[j-1]], 0);
517          traceback_row[j] = script | EDIT_SUB;
518          new_path_score = scores[j-1].path_score;
519          new_path_stop_i = scores[j-1].path_stop_i;
520          new_path_stop_j = scores[j-1].path_stop_j;
521 
522          /* switch to the insertion or deletion path, if one
523             of these has the highest overall score */
524          if (insert_score > best_score) {
525             best_score = insert_score;
526             traceback_row[j] = script | EDIT_GAP_IN_A;
527 
528             new_path_score = row_path_score;
529             new_path_stop_i = row_path_stop_i;
530             new_path_stop_j = row_path_stop_j;
531          }
532          if (scores[j].best_gap >= best_score) {
533             best_score = scores[j].best_gap;
534             traceback_row[j] = script | EDIT_GAP_IN_B;
535 
536             new_path_score = scores[j].path_score;
537             new_path_stop_i = scores[j].path_stop_i;
538             new_path_stop_j = scores[j].path_stop_j;
539          }
540 
541          if (best_score == 0) {
542             /* the score of the path extended by (i,j) has
543                decayed to zero, meaning this path can never
544                be extended again. Before proceeding to the
545                next cell and forgetting about this path, check
546                whether the highest score previously achieved
547                by this path exceeds the cutoff for the overall
548                search, and if so then recover the alignment for
549                the current path right now */
550             if (new_path_score >= cutoff) {
551                s_GetTraceback(program_number, traceback_array,
552                               A, B, b_size,
553                               gap_open, gap_extend,
554                               gap_align, new_path_stop_i,
555                               new_path_stop_j, new_path_score,
556                               hsp_list, swapped, template_hsp,
557                               score_params->options,
558                               hit_params->options, start_shift);
559             }
560             new_path_score = 0;
561          }
562 
563          /* check if (i,j) is a new local maximum score
564             for this path */
565          if (best_score > new_path_score) {
566             new_path_score = best_score;
567             new_path_stop_i = i;
568             new_path_stop_j = j;
569          }
570 
571          /* save path information */
572          scores[j-1].best = row_score;
573          scores[j-1].path_score = row_path_score;
574          scores[j-1].path_stop_i = row_path_stop_i;
575          scores[j-1].path_stop_j = row_path_stop_j;
576 
577          row_score = best_score;
578          row_path_score = new_path_score;
579          row_path_stop_i = new_path_stop_i;
580          row_path_stop_j = new_path_stop_j;
581       }
582 
583       /* save path information for the last cell in row i */
584       scores[j-1].best = row_score;
585       scores[j-1].path_score = row_path_score;
586       scores[j-1].path_stop_i = row_path_stop_i;
587       scores[j-1].path_stop_j = row_path_stop_j;
588 
589       /* the last cell may not have decayed to zero, but the
590          best score of the path containing it can still exceed
591          the cutoff. Recover the alignment if this is the case;
592          it is the last chance to do so */
593       if (scores[j-1].path_score >= cutoff) {
594          s_GetTraceback(program_number, traceback_array,
595                         A, B, b_size,
596                         gap_open, gap_extend,
597                         gap_align, scores[j-1].path_stop_i,
598                         scores[j-1].path_stop_j,
599                         scores[j-1].path_score,
600                         hsp_list, swapped, template_hsp,
601                         score_params->options,
602                         hit_params->options, start_shift);
603       }
604       traceback_row += b_size + 1;
605    }
606 
607    /* finally, check the last row again for paths that
608       have not decayed to zero */
609    for (i = 0; i < b_size; i++) {
610       if (scores[i].best && scores[i].path_score >= cutoff) {
611          s_GetTraceback(program_number, traceback_array,
612                         A, B, b_size,
613                         gap_open, gap_extend,
614                         gap_align, scores[i].path_stop_i,
615                         scores[i].path_stop_j,
616                         scores[i].path_score,
617                         hsp_list, swapped, template_hsp,
618                         score_params->options,
619                         hit_params->options, start_shift);
620       }
621    }
622 
623    free(scores);
624    free(traceback_array);
625 }
626 
627 
628 /* See blast_sw.h for details */
BLAST_SmithWatermanGetGappedScore(EBlastProgramType program_number,BLAST_SequenceBlk * query,BlastQueryInfo * query_info,BLAST_SequenceBlk * subject,BlastGapAlignStruct * gap_align,const BlastScoringParameters * score_params,const BlastExtensionParameters * ext_params,const BlastHitSavingParameters * hit_params,BlastInitHitList * init_hitlist,BlastHSPList ** hsp_list_ptr,BlastGappedStats * gapped_stats,Boolean * fence_hit)629 Int2 BLAST_SmithWatermanGetGappedScore (EBlastProgramType program_number,
630         BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
631         BLAST_SequenceBlk* subject,
632         BlastGapAlignStruct* gap_align,
633         const BlastScoringParameters* score_params,
634         const BlastExtensionParameters* ext_params,
635         const BlastHitSavingParameters* hit_params,
636         BlastInitHitList* init_hitlist,
637         BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats,
638         Boolean * fence_hit)
639 {
640    Boolean is_prot;
641    BlastHSPList* hsp_list = NULL;
642    const BlastHitSavingOptions* hit_options = hit_params->options;
643    Int4 cutoff_score = 0;
644    Int4 context;
645    Int4 **rpsblast_pssms = NULL;   /* Pointer to concatenated PSSMs in
646                                        RPS-BLAST database */
647    const int kHspNumMax = BlastHspNumMax(TRUE, hit_options);
648 
649    if (!query || !subject || !gap_align || !score_params || !ext_params ||
650        !hit_params || !init_hitlist || !hsp_list_ptr)
651       return 1;
652 
653    is_prot = (program_number != eBlastTypeBlastn &&
654               program_number != eBlastTypePhiBlastn &&
655               program_number != eBlastTypeMapping);
656 
657    if (Blast_ProgramIsRpsBlast(program_number)) {
658       Int4 rps_context = subject->oid;
659       rpsblast_pssms = gap_align->sbp->psi_matrix->pssm->data;
660       if (program_number == eBlastTypeRpsTblastn) {
661          rps_context = rps_context * NUM_FRAMES +
662                       BLAST_FrameToContext(subject->frame, program_number);
663       }
664       /* only one cutoff applies to an RPS search */
665       cutoff_score = hit_params->cutoffs[rps_context].cutoff_score;
666    }
667 
668    if (*hsp_list_ptr == NULL)
669       *hsp_list_ptr = hsp_list = Blast_HSPListNew(kHspNumMax);
670    else
671       hsp_list = *hsp_list_ptr;
672 
673    /* ignore any initial ungapped alignments; just search
674       all contexts against the subject sequence, one context
675       at a time */
676    for (context = query_info->first_context;
677                 context <= query_info->last_context; context++) {
678 
679       BlastContextInfo *curr_ctx = query_info->contexts + context;
680       Int4 score;
681       BlastHSP* new_hsp;
682 
683       if (!curr_ctx->is_valid)
684          continue;
685 
686       if (rpsblast_pssms) {
687          gap_align->sbp->psi_matrix->pssm->data =
688                                 rpsblast_pssms + curr_ctx->query_offset;
689       }
690       else {
691          cutoff_score = hit_params->cutoffs[context].cutoff_score;
692       }
693 
694       if (is_prot) {
695          score = s_SmithWatermanScoreOnly(
696                               query->sequence + curr_ctx->query_offset,
697                               curr_ctx->query_length,
698                               subject->sequence,
699                               subject->length,
700                               score_params->gap_open,
701                               score_params->gap_extend,
702                               gap_align);
703       }
704       else {
705          score = s_NuclSmithWaterman(subject->sequence,
706                                      subject->length,
707                                      query->sequence + curr_ctx->query_offset,
708                                      curr_ctx->query_length,
709                                      score_params->gap_open,
710                                      score_params->gap_extend,
711                                      gap_align);
712       }
713 
714       if (score >= cutoff_score) {
715          /* we know the score of the highest-scoring alignment but
716             not its boundaries. Use a single placeholder HSP to carry
717             score, context and frame information to the traceback phase */
718          Blast_HSPInit(0, curr_ctx->query_length, 0, subject->length, 0, 0,
719                        context, curr_ctx->frame, subject->frame, score,
720                        NULL, &new_hsp);
721          Blast_HSPListSaveHSP(hsp_list, new_hsp);
722       }
723    }
724 
725    if (rpsblast_pssms)
726        gap_align->sbp->psi_matrix->pssm->data = rpsblast_pssms;
727 
728    *hsp_list_ptr = hsp_list;
729    return 0;
730 }
731