1 /* $Id: blast_sw.c,v 1.8 2016/06/20 15:49:13 fukanchi Exp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Jason Papadopoulos
27 *
28 */
29
30 /** @file blast_sw.c
31 * Smith-Waterman gapped alignment, for use with infrastructure of BLAST
32 * @sa blast_sw.h
33 */
34
35 #include <algo/blast/core/blast_sw.h>
36 #include <algo/blast/core/blast_util.h> /* for NCBI2NA_UNPACK_BASE */
37
38 /** swap (pointers to) a pair of sequences */
39 #define SWAP_SEQS(A, B) {const Uint1 *tmp = (A); (A) = (B); (B) = tmp; }
40
41 /** swap two integers */
42 #define SWAP_INT(A, B) {Int4 tmp = (A); (A) = (B); (B) = tmp; }
43
44 /** Compute the score of the best local alignment between
45 * two protein sequences. When using Smith-Waterman, the vast
46 * majority of the runtime is tied up in this routine.
47 * @param A The first sequence [in]
48 * @param a_size Length of the first sequence [in]
49 * @param B The second sequence [in]
50 * @param b_size Length of the second sequence [in]
51 * @param gap_open Gap open penalty [in]
52 * @param gap_extend Gap extension penalty [in]
53 * @param gap_align Auxiliary data for gapped alignment
54 * (used for score matrix info) [in]
55 * @return The score of the best local alignment between A and B
56 */
s_SmithWatermanScoreOnly(const Uint1 * A,Int4 a_size,const Uint1 * B,Int4 b_size,Int4 gap_open,Int4 gap_extend,BlastGapAlignStruct * gap_align)57 static Int4 s_SmithWatermanScoreOnly(const Uint1 *A, Int4 a_size,
58 const Uint1 *B, Int4 b_size,
59 Int4 gap_open, Int4 gap_extend,
60 BlastGapAlignStruct *gap_align)
61 {
62 Int4 i, j;
63 Int4 **matrix;
64 Int4 *matrix_row;
65
66 Int4 final_best_score;
67 Int4 best_score;
68 Int4 insert_score;
69 Int4 row_score;
70 BlastGapDP *scores;
71
72 Boolean is_pssm = gap_align->positionBased;
73 Int4 gap_open_extend = gap_open + gap_extend;
74
75 /* choose the score matrix */
76 if (is_pssm) {
77 matrix = gap_align->sbp->psi_matrix->pssm->data;
78 }
79 else {
80 /* for square score matrices, assume the matrix
81 is symmetric. This means that A and B can be
82 switched without changing the score, and this
83 saves memory if one sequence is large but the
84 other is not */
85 if (a_size < b_size) {
86 SWAP_SEQS(A, B);
87 SWAP_INT(a_size, b_size);
88 }
89 matrix = gap_align->sbp->matrix->data;
90 }
91
92 /* allocate space for scratch structures */
93 if (b_size + 1 > gap_align->dp_mem_alloc) {
94 gap_align->dp_mem_alloc = MAX(b_size + 100,
95 2 * gap_align->dp_mem_alloc);
96 sfree(gap_align->dp_mem);
97 gap_align->dp_mem = (BlastGapDP *)malloc(gap_align->dp_mem_alloc *
98 sizeof(BlastGapDP));
99 }
100 scores = gap_align->dp_mem;
101 memset(scores, 0, (b_size + 1) * sizeof(BlastGapDP));
102 final_best_score = 0;
103
104
105 for (i = 1; i <= a_size; i++) {
106
107 if (is_pssm)
108 matrix_row = matrix[i-1];
109 else
110 matrix_row = matrix[A[i-1]];
111
112 insert_score = 0;
113 row_score = 0;
114
115 for (j = 1; j <= b_size; j++) {
116
117 /* score of best alignment ending at (i,j) with gap in B */
118 best_score = scores[j].best_gap - gap_extend;
119 if (scores[j].best - gap_open_extend > best_score)
120 best_score = scores[j].best - gap_open_extend;
121 scores[j].best_gap = best_score;
122
123 /* score of best alignment ending at (i,j) with gap in A */
124 best_score = insert_score - gap_extend;
125 if (row_score - gap_open_extend > best_score)
126 best_score = row_score - gap_open_extend;
127 insert_score = best_score;
128
129 /* score of best alignment ending at (i,j) */
130 best_score = MAX(scores[j-1].best + matrix_row[B[j-1]], 0);
131 if (insert_score > best_score)
132 best_score = insert_score;
133 if (scores[j].best_gap > best_score)
134 best_score = scores[j].best_gap;
135
136 if (best_score > final_best_score)
137 final_best_score = best_score;
138
139 scores[j-1].best = row_score;
140 row_score = best_score;
141 }
142
143 scores[j-1].best = row_score;
144 }
145
146 return final_best_score;
147 }
148
149
150 /** Compute the score of the best local alignment between
151 * two nucleotide sequences. One of the sequences must be in
152 * packed format. For nucleotide Smith-Waterman, the vast
153 * majority of the runtime is tied up in this routine.
154 * @param B The first sequence (must be in ncbi2na format) [in]
155 * @param b_size Length of the first sequence [in]
156 * @param A The second sequence [in]
157 * @param a_size Length of the second sequence [in]
158 * @param gap_open Gap open penalty [in]
159 * @param gap_extend Gap extension penalty [in]
160 * @param gap_align Auxiliary data for gapped alignment
161 * (used for score matrix info) [in]
162 * @return The score of the best local alignment between A and B
163 */
s_NuclSmithWaterman(const Uint1 * B,Int4 b_size,const Uint1 * A,Int4 a_size,Int4 gap_open,Int4 gap_extend,BlastGapAlignStruct * gap_align)164 static Int4 s_NuclSmithWaterman(const Uint1 *B, Int4 b_size,
165 const Uint1 *A, Int4 a_size,
166 Int4 gap_open, Int4 gap_extend,
167 BlastGapAlignStruct *gap_align)
168 {
169 Int4 i, j;
170 Int4 **matrix;
171 Int4 *matrix_row;
172
173 Int4 final_best_score;
174 Int4 best_score;
175 Int4 insert_score;
176 Int4 row_score;
177 BlastGapDP *scores;
178
179 Int4 gap_open_extend = gap_open + gap_extend;
180
181 /* position-specific scoring is not allowed, because
182 the loops below assume the score matrix is symmetric */
183 matrix = gap_align->sbp->matrix->data;
184
185 if (a_size + 1 > gap_align->dp_mem_alloc) {
186 gap_align->dp_mem_alloc = MAX(a_size + 100,
187 2 * gap_align->dp_mem_alloc);
188 sfree(gap_align->dp_mem);
189 gap_align->dp_mem = (BlastGapDP *)malloc(gap_align->dp_mem_alloc *
190 sizeof(BlastGapDP));
191 }
192 scores = gap_align->dp_mem;
193 memset(scores, 0, (a_size + 1) * sizeof(BlastGapDP));
194 final_best_score = 0;
195
196
197 for (i = 1; i <= b_size; i++) {
198
199 /* The only real difference between this routine
200 and its unpacked counterpart is the choice of
201 score matrix row in the outer loop */
202 Int4 base_pair = NCBI2NA_UNPACK_BASE(B[(i-1)/4], (3-((i-1)%4)));
203 matrix_row = matrix[base_pair];
204 insert_score = 0;
205 row_score = 0;
206
207 for (j = 1; j <= a_size; j++) {
208
209 /* score of best alignment ending at (i,j) with gap in A */
210 best_score = scores[j].best_gap - gap_extend;
211 if (scores[j].best - gap_open_extend > best_score)
212 best_score = scores[j].best - gap_open_extend;
213 scores[j].best_gap = best_score;
214
215 /* score of best alignment ending at (i,j) with gap in B */
216 best_score = insert_score - gap_extend;
217 if (row_score - gap_open_extend > best_score)
218 best_score = row_score - gap_open_extend;
219 insert_score = best_score;
220
221 /* score of best alignment ending at (i,j) */
222 best_score = MAX(scores[j-1].best + matrix_row[A[j-1]], 0);
223 if (insert_score > best_score)
224 best_score = insert_score;
225 if (scores[j].best_gap > best_score)
226 best_score = scores[j].best_gap;
227
228 if (best_score > final_best_score)
229 final_best_score = best_score;
230
231 scores[j-1].best = row_score;
232 row_score = best_score;
233 }
234
235 scores[j-1].best = row_score;
236 }
237
238 return final_best_score;
239 }
240
241
242 /** Values for the editing script operations in traceback */
243 enum {
244 EDIT_SUB = eGapAlignSub, /**< Substitution */
245 EDIT_GAP_IN_A = eGapAlignDel, /**< Deletion */
246 EDIT_GAP_IN_B = eGapAlignIns, /**< Insertion */
247 EDIT_OP_MASK = 0x07, /**< Mask for edit script operations */
248
249 EDIT_START_GAP_A = 0x10, /**< open a gap in A */
250 EDIT_START_GAP_B = 0x20 /**< open a gap in B */
251 };
252
253
254 /** Generate the traceback for a single local alignment
255 * between two (unpacked) sequences, then create an HSP
256 * for the alignment and add to a list of such HSPs
257 * @param program_number Blast program requesting traceback [in]
258 * @param trace 2-D array of edit actions, size (a_size+1) x (b_size+1) [in]
259 * @param A The first sequence [in]
260 * @param B The second sequence [in]
261 * @param b_size Length of the second sequence [in]
262 * @param gap_open Gap open penalty [in]
263 * @param gap_extend Gap extension penalty [in]
264 * @param gap_align Auxiliary data for gapped alignment
265 * (used for score matrix info) [in]
266 * @param a_end The alignment end offset on A (plus one) [in]
267 * @param b_end The alignment end offset on B (plus one) [in]
268 * @param best_score Score of the alignment [in]
269 * @param hsp_list Collection of alignments found so far [in][out]
270 * @param swapped TRUE if A and B were swapped before the alignment
271 * was found [in]
272 * @param template_hsp Placeholder alignment, used only to
273 * determine contexts and frames [in]
274 * @param score_options Structure containing gap penalties [in]
275 * @param hit_options Structure used for percent identity calculation [in]
276 * @param start_shift Bias to be applied to subject offsets [in]
277 */
s_GetTraceback(EBlastProgramType program_number,Uint1 * trace,const Uint1 * A,const Uint1 * B,Int4 b_size,Int4 gap_open,Int4 gap_extend,BlastGapAlignStruct * gap_align,Int4 a_end,Int4 b_end,Int4 best_score,BlastHSPList * hsp_list,Boolean swapped,BlastHSP * template_hsp,const BlastScoringOptions * score_options,const BlastHitSavingOptions * hit_options,Int4 start_shift)278 static void s_GetTraceback(EBlastProgramType program_number,
279 Uint1 *trace, const Uint1 *A, const Uint1 *B, Int4 b_size,
280 Int4 gap_open, Int4 gap_extend,
281 BlastGapAlignStruct *gap_align,
282 Int4 a_end, Int4 b_end, Int4 best_score,
283 BlastHSPList *hsp_list, Boolean swapped,
284 BlastHSP *template_hsp,
285 const BlastScoringOptions *score_options,
286 const BlastHitSavingOptions *hit_options,
287 Int4 start_shift)
288 {
289 Int4 i, j;
290 Uint1 script;
291 Uint1 next_action;
292 Uint1 *traceback_row;
293 Int4 a_start, b_start;
294 Int4 **matrix;
295 Int4 curr_score = -best_score;
296 Boolean is_pssm = gap_align->positionBased;
297 GapPrelimEditBlock *prelim_tback = gap_align->fwd_prelim_tback;
298 GapEditScript *final_tback;
299 BlastHSP *new_hsp;
300
301 i = a_end;
302 j = b_end;
303 traceback_row = trace + i * (b_size + 1);
304 script = traceback_row[j] & EDIT_OP_MASK;
305 GapPrelimEditBlockReset(prelim_tback);
306
307 if (is_pssm)
308 matrix = gap_align->sbp->psi_matrix->pssm->data;
309 else
310 matrix = gap_align->sbp->matrix->data;
311
312 /* Only the start point of the alignment is unknown, but
313 there was no bookkeeping performed to remember where the
314 start point is located. We know the list of traceback
315 actions backwards from (i,j) and know the score of the
316 alignment, so the start point will be the offset pair where
317 the score becomes zero after edit operations are applied */
318
319 while (curr_score != 0) {
320
321 next_action = traceback_row[j];
322 GapPrelimEditBlockAdd(prelim_tback, (EGapAlignOpType)script, 1);
323
324 switch(script) {
325 case EDIT_SUB:
326 if (is_pssm)
327 curr_score += matrix[i-1][B[j-1]];
328 else
329 curr_score += matrix[A[i-1]][B[j-1]];
330
331 i--; j--;
332 traceback_row -= b_size + 1;
333 script = traceback_row[j] & EDIT_OP_MASK;
334 break;
335 case EDIT_GAP_IN_A:
336 j--;
337 if (next_action & EDIT_START_GAP_A) {
338 script = traceback_row[j] & EDIT_OP_MASK;
339 curr_score -= gap_open;
340 }
341 curr_score -= gap_extend;
342 break;
343 case EDIT_GAP_IN_B:
344 i--;
345 traceback_row -= b_size + 1;
346 if (next_action & EDIT_START_GAP_B) {
347 script = traceback_row[j] & EDIT_OP_MASK;
348 curr_score -= gap_open;
349 }
350 curr_score -= gap_extend;
351 break;
352 }
353 }
354
355 /* found the start point. Now create the final (reversed)
356 edit script; if A and B were swapped before calling
357 this routine, swap them back */
358 a_start = i;
359 b_start = j;
360 final_tback = GapEditScriptNew(prelim_tback->num_ops);
361 for (i = prelim_tback->num_ops - 1, j = 0; i >= 0; i--, j++) {
362 GapPrelimEditScript *p = prelim_tback->edit_ops + i;
363 final_tback->num[j] = p->num;
364 final_tback->op_type[j] = p->op_type;
365 if (swapped) {
366 if (p->op_type == eGapAlignIns)
367 final_tback->op_type[j] = eGapAlignDel;
368 else if (p->op_type == eGapAlignDel)
369 final_tback->op_type[j] = eGapAlignIns;
370 }
371 }
372
373 if (swapped) {
374 SWAP_SEQS(A, B);
375 SWAP_INT(a_start, b_start);
376 SWAP_INT(a_end, b_end);
377 }
378
379 /* construct an HSP, verify it meets length and percent
380 identity criteria, and save it */
381 Blast_HSPInit(a_start, a_end, b_start, b_end,
382 a_start, b_start, template_hsp->context,
383 template_hsp->query.frame,
384 template_hsp->subject.frame, best_score,
385 &final_tback, &new_hsp);
386
387 if (Blast_HSPTestIdentityAndLength(program_number, new_hsp, A, B,
388 score_options, hit_options)) {
389 Blast_HSPFree(new_hsp);
390 }
391 else {
392 Blast_HSPAdjustSubjectOffset(new_hsp, start_shift);
393 Blast_HSPListSaveHSP(hsp_list, new_hsp);
394 }
395 }
396
397
398 /** Auxiliary structures for Smith-Waterman alignment
399 * with traceback
400 */
401 typedef struct BlastGapSW{
402 Int4 best; /**< Score of best alignment at this position */
403 Int4 best_gap; /**< Score of best alignment at this position that
404 ends in a gap */
405 Int4 path_score; /**< The highest score that the alignment at this
406 position has previously achieved */
407 Int4 path_stop_i; /**< Offset (plus one) on the first sequence where
408 path_score occurs */
409 Int4 path_stop_j; /**< Offset (plus one) on the second sequence where
410 path_score occurs */
411 } BlastGapSW;
412
413 /* See blast_sw.h for details */
SmithWatermanScoreWithTraceback(EBlastProgramType program_number,const Uint1 * A,Int4 a_size,const Uint1 * B,Int4 b_size,BlastHSP * template_hsp,BlastHSPList * hsp_list,const BlastScoringParameters * score_params,const BlastHitSavingParameters * hit_params,BlastGapAlignStruct * gap_align,Int4 start_shift,Int4 cutoff)414 void SmithWatermanScoreWithTraceback(EBlastProgramType program_number,
415 const Uint1 *A, Int4 a_size,
416 const Uint1 *B, Int4 b_size,
417 BlastHSP *template_hsp,
418 BlastHSPList *hsp_list,
419 const BlastScoringParameters *score_params,
420 const BlastHitSavingParameters *hit_params,
421 BlastGapAlignStruct *gap_align,
422 Int4 start_shift, Int4 cutoff)
423 {
424 Int4 i, j;
425 Int4 *matrix_row;
426 Int4 **matrix;
427 Boolean swapped = FALSE;
428
429 Int4 best_score;
430 Int4 insert_score;
431 Int4 row_score;
432 Int4 row_path_score;
433 Int4 row_path_stop_i;
434 Int4 row_path_stop_j;
435 Int4 new_path_score;
436 Int4 new_path_stop_i;
437 Int4 new_path_stop_j;
438 BlastGapSW *scores;
439
440 Boolean is_pssm = gap_align->positionBased;
441 Int4 gap_open = score_params->gap_open;
442 Int4 gap_extend = score_params->gap_extend;
443 Int4 gap_open_extend = gap_open + gap_extend;
444
445 Uint1 *traceback_array;
446 Uint1 *traceback_row;
447 Uint1 script;
448
449 /* choose the score matrix */
450 if (is_pssm) {
451 matrix = gap_align->sbp->psi_matrix->pssm->data;
452 }
453 else {
454 /* for square score matrices, assume the matrix
455 is symmetric. This means that A and B can be
456 switched without changing the score, and this
457 saves memory if one sequence is large but the
458 other is not */
459 if (a_size < b_size) {
460 swapped = TRUE;
461 SWAP_SEQS(A, B);
462 SWAP_INT(a_size, b_size);
463 }
464 matrix = gap_align->sbp->matrix->data;
465 }
466
467 /* allocate space for scratch structures */
468 scores = (BlastGapSW *)calloc(b_size + 1, sizeof(BlastGapSW));
469 traceback_array = (Uint1 *)malloc((a_size + 1) * (b_size + 1) *
470 sizeof(Uint1));
471 traceback_row = traceback_array;
472 for (i = 0; i <= b_size; i++)
473 traceback_row[i] = EDIT_GAP_IN_A;
474 traceback_row += b_size + 1;
475
476 for (i = 1; i <= a_size; i++) {
477
478 if (is_pssm)
479 matrix_row = matrix[i-1];
480 else
481 matrix_row = matrix[A[i-1]];
482
483 insert_score = 0;
484 row_score = 0;
485 row_path_stop_i = 0;
486 row_path_stop_j = 0;
487 row_path_score = 0;
488 traceback_row[0] = EDIT_GAP_IN_B;
489
490 for (j = 1; j <= b_size; j++) {
491
492 /* score of best alignment ending at (i,j) with gap in B */
493 best_score = scores[j].best_gap - gap_extend;
494 script = 0;
495 if (scores[j].best - gap_open_extend > best_score) {
496 script |= EDIT_START_GAP_B;
497 best_score = scores[j].best - gap_open_extend;
498 }
499 scores[j].best_gap = best_score;
500
501 /* score of best alignment ending at (i,j) with gap in A */
502 best_score = insert_score - gap_extend;
503 if (row_score - gap_open_extend > best_score) {
504 script |= EDIT_START_GAP_A;
505 best_score = row_score - gap_open_extend;
506 }
507 insert_score = best_score;
508
509 /* Every cell computed by Smith-Waterman lies on exactly
510 one highest-scoring path. This means that at any given
511 time there are only b_size possible paths that need
512 bookkeeping information. In addition to the 3-way
513 maximum needed to choose the highest score, we also
514 need to choose the path that (i,j) extends. Begin by
515 assuming (i,j) extends the substitution path */
516 best_score = MAX(scores[j-1].best + matrix_row[B[j-1]], 0);
517 traceback_row[j] = script | EDIT_SUB;
518 new_path_score = scores[j-1].path_score;
519 new_path_stop_i = scores[j-1].path_stop_i;
520 new_path_stop_j = scores[j-1].path_stop_j;
521
522 /* switch to the insertion or deletion path, if one
523 of these has the highest overall score */
524 if (insert_score > best_score) {
525 best_score = insert_score;
526 traceback_row[j] = script | EDIT_GAP_IN_A;
527
528 new_path_score = row_path_score;
529 new_path_stop_i = row_path_stop_i;
530 new_path_stop_j = row_path_stop_j;
531 }
532 if (scores[j].best_gap >= best_score) {
533 best_score = scores[j].best_gap;
534 traceback_row[j] = script | EDIT_GAP_IN_B;
535
536 new_path_score = scores[j].path_score;
537 new_path_stop_i = scores[j].path_stop_i;
538 new_path_stop_j = scores[j].path_stop_j;
539 }
540
541 if (best_score == 0) {
542 /* the score of the path extended by (i,j) has
543 decayed to zero, meaning this path can never
544 be extended again. Before proceeding to the
545 next cell and forgetting about this path, check
546 whether the highest score previously achieved
547 by this path exceeds the cutoff for the overall
548 search, and if so then recover the alignment for
549 the current path right now */
550 if (new_path_score >= cutoff) {
551 s_GetTraceback(program_number, traceback_array,
552 A, B, b_size,
553 gap_open, gap_extend,
554 gap_align, new_path_stop_i,
555 new_path_stop_j, new_path_score,
556 hsp_list, swapped, template_hsp,
557 score_params->options,
558 hit_params->options, start_shift);
559 }
560 new_path_score = 0;
561 }
562
563 /* check if (i,j) is a new local maximum score
564 for this path */
565 if (best_score > new_path_score) {
566 new_path_score = best_score;
567 new_path_stop_i = i;
568 new_path_stop_j = j;
569 }
570
571 /* save path information */
572 scores[j-1].best = row_score;
573 scores[j-1].path_score = row_path_score;
574 scores[j-1].path_stop_i = row_path_stop_i;
575 scores[j-1].path_stop_j = row_path_stop_j;
576
577 row_score = best_score;
578 row_path_score = new_path_score;
579 row_path_stop_i = new_path_stop_i;
580 row_path_stop_j = new_path_stop_j;
581 }
582
583 /* save path information for the last cell in row i */
584 scores[j-1].best = row_score;
585 scores[j-1].path_score = row_path_score;
586 scores[j-1].path_stop_i = row_path_stop_i;
587 scores[j-1].path_stop_j = row_path_stop_j;
588
589 /* the last cell may not have decayed to zero, but the
590 best score of the path containing it can still exceed
591 the cutoff. Recover the alignment if this is the case;
592 it is the last chance to do so */
593 if (scores[j-1].path_score >= cutoff) {
594 s_GetTraceback(program_number, traceback_array,
595 A, B, b_size,
596 gap_open, gap_extend,
597 gap_align, scores[j-1].path_stop_i,
598 scores[j-1].path_stop_j,
599 scores[j-1].path_score,
600 hsp_list, swapped, template_hsp,
601 score_params->options,
602 hit_params->options, start_shift);
603 }
604 traceback_row += b_size + 1;
605 }
606
607 /* finally, check the last row again for paths that
608 have not decayed to zero */
609 for (i = 0; i < b_size; i++) {
610 if (scores[i].best && scores[i].path_score >= cutoff) {
611 s_GetTraceback(program_number, traceback_array,
612 A, B, b_size,
613 gap_open, gap_extend,
614 gap_align, scores[i].path_stop_i,
615 scores[i].path_stop_j,
616 scores[i].path_score,
617 hsp_list, swapped, template_hsp,
618 score_params->options,
619 hit_params->options, start_shift);
620 }
621 }
622
623 free(scores);
624 free(traceback_array);
625 }
626
627
628 /* See blast_sw.h for details */
BLAST_SmithWatermanGetGappedScore(EBlastProgramType program_number,BLAST_SequenceBlk * query,BlastQueryInfo * query_info,BLAST_SequenceBlk * subject,BlastGapAlignStruct * gap_align,const BlastScoringParameters * score_params,const BlastExtensionParameters * ext_params,const BlastHitSavingParameters * hit_params,BlastInitHitList * init_hitlist,BlastHSPList ** hsp_list_ptr,BlastGappedStats * gapped_stats,Boolean * fence_hit)629 Int2 BLAST_SmithWatermanGetGappedScore (EBlastProgramType program_number,
630 BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
631 BLAST_SequenceBlk* subject,
632 BlastGapAlignStruct* gap_align,
633 const BlastScoringParameters* score_params,
634 const BlastExtensionParameters* ext_params,
635 const BlastHitSavingParameters* hit_params,
636 BlastInitHitList* init_hitlist,
637 BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats,
638 Boolean * fence_hit)
639 {
640 Boolean is_prot;
641 BlastHSPList* hsp_list = NULL;
642 const BlastHitSavingOptions* hit_options = hit_params->options;
643 Int4 cutoff_score = 0;
644 Int4 context;
645 Int4 **rpsblast_pssms = NULL; /* Pointer to concatenated PSSMs in
646 RPS-BLAST database */
647 const int kHspNumMax = BlastHspNumMax(TRUE, hit_options);
648
649 if (!query || !subject || !gap_align || !score_params || !ext_params ||
650 !hit_params || !init_hitlist || !hsp_list_ptr)
651 return 1;
652
653 is_prot = (program_number != eBlastTypeBlastn &&
654 program_number != eBlastTypePhiBlastn &&
655 program_number != eBlastTypeMapping);
656
657 if (Blast_ProgramIsRpsBlast(program_number)) {
658 Int4 rps_context = subject->oid;
659 rpsblast_pssms = gap_align->sbp->psi_matrix->pssm->data;
660 if (program_number == eBlastTypeRpsTblastn) {
661 rps_context = rps_context * NUM_FRAMES +
662 BLAST_FrameToContext(subject->frame, program_number);
663 }
664 /* only one cutoff applies to an RPS search */
665 cutoff_score = hit_params->cutoffs[rps_context].cutoff_score;
666 }
667
668 if (*hsp_list_ptr == NULL)
669 *hsp_list_ptr = hsp_list = Blast_HSPListNew(kHspNumMax);
670 else
671 hsp_list = *hsp_list_ptr;
672
673 /* ignore any initial ungapped alignments; just search
674 all contexts against the subject sequence, one context
675 at a time */
676 for (context = query_info->first_context;
677 context <= query_info->last_context; context++) {
678
679 BlastContextInfo *curr_ctx = query_info->contexts + context;
680 Int4 score;
681 BlastHSP* new_hsp;
682
683 if (!curr_ctx->is_valid)
684 continue;
685
686 if (rpsblast_pssms) {
687 gap_align->sbp->psi_matrix->pssm->data =
688 rpsblast_pssms + curr_ctx->query_offset;
689 }
690 else {
691 cutoff_score = hit_params->cutoffs[context].cutoff_score;
692 }
693
694 if (is_prot) {
695 score = s_SmithWatermanScoreOnly(
696 query->sequence + curr_ctx->query_offset,
697 curr_ctx->query_length,
698 subject->sequence,
699 subject->length,
700 score_params->gap_open,
701 score_params->gap_extend,
702 gap_align);
703 }
704 else {
705 score = s_NuclSmithWaterman(subject->sequence,
706 subject->length,
707 query->sequence + curr_ctx->query_offset,
708 curr_ctx->query_length,
709 score_params->gap_open,
710 score_params->gap_extend,
711 gap_align);
712 }
713
714 if (score >= cutoff_score) {
715 /* we know the score of the highest-scoring alignment but
716 not its boundaries. Use a single placeholder HSP to carry
717 score, context and frame information to the traceback phase */
718 Blast_HSPInit(0, curr_ctx->query_length, 0, subject->length, 0, 0,
719 context, curr_ctx->frame, subject->frame, score,
720 NULL, &new_hsp);
721 Blast_HSPListSaveHSP(hsp_list, new_hsp);
722 }
723 }
724
725 if (rpsblast_pssms)
726 gap_align->sbp->psi_matrix->pssm->data = rpsblast_pssms;
727
728 *hsp_list_ptr = hsp_list;
729 return 0;
730 }
731