1 /* $Id: blast_filter.c,v 1.102 2016/06/20 15:49:13 fukanchi Exp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 /** @file blast_filter.c
28 * All code related to query sequence masking/filtering for BLAST
29 */
30
31 #include <algo/blast/core/blast_util.h>
32 #include <algo/blast/core/blast_filter.h>
33 #include <algo/blast/core/blast_seg.h>
34
35 /****************************************************************************/
36 /* Constants */
37 const Uint1 kNuclMask = 14; /* N in BLASTNA */
38 const Uint1 kProtMask = 21; /* X in NCBISTDAA */
39
40
41 /** Allowed length of the filtering options string. */
42 #define BLASTOPTIONS_BUFFER_SIZE 128
43
44
45 /** Copies filtering commands for one filtering algorithm from "instructions" to
46 * "buffer".
47 * ";" is a delimiter for the commands for different algorithms, so it stops
48 * copying when a ";" is found.
49 * Example filtering string: "m L; R -d rodents.lib"
50 * @param instructions filtering commands [in]
51 * @param buffer filled with filtering commands for one algorithm. [out]
52 */
53 static const char *
s_LoadOptionsToBuffer(const char * instructions,char * buffer)54 s_LoadOptionsToBuffer(const char *instructions, char* buffer)
55 {
56 Boolean not_started=TRUE;
57 char* buffer_ptr;
58 const char *ptr;
59 Int4 index;
60
61 ptr = instructions;
62 buffer_ptr = buffer;
63 for (index=0; index<BLASTOPTIONS_BUFFER_SIZE && *ptr != NULLB; index++)
64 {
65 if (*ptr == ';')
66 { /* ";" is a delimiter for different filtering algorithms. */
67 ptr++;
68 break;
69 }
70 /* Remove blanks at the beginning. */
71 if (not_started && *ptr == ' ')
72 {
73 ptr++;
74 }
75 else
76 {
77 not_started = FALSE;
78 *buffer_ptr = *ptr;
79 buffer_ptr++; ptr++;
80 }
81 }
82
83 *buffer_ptr = NULLB;
84
85 if (not_started == FALSE)
86 { /* Remove trailing blanks. */
87 buffer_ptr--;
88 while (*buffer_ptr == ' ' && buffer_ptr > buffer)
89 {
90 *buffer_ptr = NULLB;
91 buffer_ptr--;
92 }
93 }
94
95 return ptr;
96 }
97
98 /** Parses repeat filtering options string.
99 * @param repeat_options Input character string [in]
100 * @param dbname Database name for repeats filtering [out]
101 */
102 static Int2
s_ParseRepeatOptions(const char * repeat_options,char ** dbname)103 s_ParseRepeatOptions(const char* repeat_options, char** dbname)
104 {
105 char* ptr;
106
107 ASSERT(dbname);
108 *dbname = NULL;
109
110 if (!repeat_options)
111 return 0;
112
113 ptr = strstr(repeat_options, "-d");
114 if (ptr) {
115 ptr += 2;
116 while (*ptr == ' ' || *ptr == '\t')
117 ++ptr;
118 *dbname = strdup(ptr);
119 }
120 return 0;
121 }
122
123 /** Parses window masker options string.
124 * @param winmask_options Input character string [in]
125 * @param dbname Database name for window masker filtering [out]
126 * @param taxid Taxonomic ID for window masker filtering [out]
127 */
128 static Int2
s_ParseWindowMaskerOptions(const char * winmask_options,char ** dbname,int * taxid)129 s_ParseWindowMaskerOptions(const char * winmask_options,
130 char ** dbname,
131 int * taxid)
132 {
133 char* ptr = NULL;
134
135 ASSERT(dbname);
136 *dbname = NULL;
137
138 if (!winmask_options)
139 return 0;
140
141 ptr = strstr(winmask_options, "-d");
142
143 if (ptr) {
144 char * endp = 0;
145
146 ptr += 2;
147 while (*ptr == ' ' || *ptr == '\t')
148 ++ptr;
149
150 *dbname = strdup(ptr);
151
152 for(endp = *dbname; *endp; ++endp) {
153 if (*endp == ' ' || *endp == '\t') {
154 *endp = (char)0;
155 break;
156 }
157 }
158 } else {
159 ptr = strstr(winmask_options, "-t");
160
161 if (ptr) {
162 ptr += 2;
163 while (*ptr == ' ' || *ptr == '\t')
164 ++ptr;
165 *taxid = atoi(ptr);
166 }
167 }
168
169 return 0;
170 }
171
172 /** Parses options used for dust.
173 * @param ptr buffer containing instructions. [in]
174 * @param level sets level for dust. [out]
175 * @param window sets window for dust [out]
176 * @param linker sets linker for dust. [out]
177 */
178 static Int2
s_ParseDustOptions(const char * ptr,int * level,int * window,int * linker)179 s_ParseDustOptions(const char *ptr, int* level, int* window, int* linker)
180
181 {
182 char buffer[BLASTOPTIONS_BUFFER_SIZE];
183 int arg, index, index1, window_pri=-1, linker_pri=-1, level_pri=-1;
184
185 arg = 0;
186 index1 = 0;
187 for (index=0; index<BLASTOPTIONS_BUFFER_SIZE; index++)
188 {
189 if (*ptr == ' ' || *ptr == NULLB)
190 {
191 long tmplong;
192 buffer[index1] = NULLB;
193 index1 = 0;
194 switch(arg) {
195 case 0:
196 sscanf(buffer, "%ld", &tmplong);
197 level_pri = tmplong;
198 break;
199 case 1:
200 sscanf(buffer, "%ld", &tmplong);
201 window_pri = tmplong;
202 break;
203 case 2:
204 sscanf(buffer, "%ld", &tmplong);
205 linker_pri = tmplong;
206 break;
207 default:
208 break;
209 }
210
211 arg++;
212 while (*ptr == ' ')
213 ptr++;
214
215 /* end of the buffer. */
216 if (*ptr == NULLB)
217 break;
218 }
219 else
220 {
221 buffer[index1] = *ptr; ptr++;
222 index1++;
223 }
224 }
225 if (arg != 0 && arg != 3)
226 return 1;
227
228 *level = level_pri;
229 *window = window_pri;
230 *linker = linker_pri;
231
232 return 0;
233 }
234
235 /** parses a string to set three seg options.
236 * @param ptr buffer containing instructions [in]
237 * @param window returns "window" for seg algorithm. [out]
238 * @param locut returns "locut" for seg. [out]
239 * @param hicut returns "hicut" for seg. [out]
240 */
241 static Int2
s_ParseSegOptions(const char * ptr,Int4 * window,double * locut,double * hicut)242 s_ParseSegOptions(const char *ptr, Int4* window, double* locut, double* hicut)
243
244 {
245 char buffer[BLASTOPTIONS_BUFFER_SIZE];
246 Int4 arg, index, index1;
247
248 arg = 0;
249 index1 = 0;
250 for (index=0; index<BLASTOPTIONS_BUFFER_SIZE; index++)
251 {
252 if (*ptr == ' ' || *ptr == NULLB)
253 {
254 long tmplong;
255 double tmpdouble;
256 buffer[index1] = NULLB;
257 index1 = 0;
258 switch(arg) {
259 case 0:
260 sscanf(buffer, "%ld", &tmplong);
261 *window = tmplong;
262 break;
263 case 1:
264 sscanf(buffer, "%le", &tmpdouble);
265 *locut = tmpdouble;
266 break;
267 case 2:
268 sscanf(buffer, "%le", &tmpdouble);
269 *hicut = tmpdouble;
270 break;
271 default:
272 break;
273 }
274
275 arg++;
276 while (*ptr == ' ')
277 ptr++;
278
279 /* end of the buffer. */
280 if (*ptr == NULLB)
281 break;
282 }
283 else
284 {
285 buffer[index1] = *ptr; ptr++;
286 index1++;
287 }
288 }
289 if (arg != 0 && arg != 3)
290 return 1;
291
292 return 0;
293 }
294
295 /// Wrapper around strcat to ensure we don't do buffer overflows :)
296 /// @param dest string to concatenate to [in|out]
297 /// @param dest_size size of the dest array, modified if dest is grown [in|out]
298 /// @param string2append string to append to dest [in]
299 /// @return the concatenated string or NULL if we run out of memory
300 static char*
s_SafeStrCat(char ** dest,unsigned int * dest_size,const char * string2append)301 s_SafeStrCat(char** dest, unsigned int* dest_size, const char* string2append)
302 {
303 size_t dest_length = strlen(*dest);
304 size_t string2append_length = strlen(string2append);
305 if ((dest_length + string2append_length + 1) > *dest_size) {
306 size_t target_size = MAX(string2append_length, dest_length) * 2;
307 *dest = (char*)realloc((void*)*dest, target_size);
308 if (*dest) {
309 (*dest_size) = target_size;
310 } else {
311 sfree(*dest);
312 return 0;
313 }
314 }
315 strcat(*dest, string2append);
316 return *dest;
317 }
318
319 char*
BlastFilteringOptionsToString(const SBlastFilterOptions * filtering_options)320 BlastFilteringOptionsToString(const SBlastFilterOptions* filtering_options)
321 {
322 char* retval = NULL;
323 unsigned int retval_size = 0;
324
325 if (filtering_options == NULL) {
326 return strdup("F");
327 }
328
329 retval_size = 64; /* Usually this will suffice */
330 retval = (char*) calloc(retval_size, sizeof(char));
331
332 if (filtering_options->dustOptions) {
333 if (filtering_options->dustOptions->level == kDustLevel &&
334 filtering_options->dustOptions->window == kDustWindow &&
335 filtering_options->dustOptions->linker == kDustLinker) {
336 if (!s_SafeStrCat(&retval, &retval_size, "L;")) {
337 return 0;
338 }
339 } else {
340 char buffer[24] = { '\0' };
341 snprintf(buffer, sizeof(buffer), "D %d %d %d;",
342 filtering_options->dustOptions->level,
343 filtering_options->dustOptions->window,
344 filtering_options->dustOptions->linker);
345 if (!s_SafeStrCat(&retval, &retval_size, buffer)) {
346 return 0;
347 }
348 }
349 }
350
351 if (filtering_options->segOptions) {
352 if (filtering_options->segOptions->window == kSegWindow &&
353 filtering_options->segOptions->locut == kSegLocut &&
354 filtering_options->segOptions->hicut == kSegHicut) {
355 if (!s_SafeStrCat(&retval, &retval_size, "L;")) {
356 return 0;
357 }
358 } else {
359 char buffer[24] = { '\0' };
360 snprintf(buffer, sizeof(buffer), "S %d %1.1f %1.1f;",
361 filtering_options->segOptions->window,
362 filtering_options->segOptions->locut,
363 filtering_options->segOptions->hicut);
364 if (!s_SafeStrCat(&retval, &retval_size, buffer)) {
365 return 0;
366 }
367 }
368 }
369
370 if (filtering_options->repeatFilterOptions) {
371 if (filtering_options->repeatFilterOptions->database) {
372 if (!s_SafeStrCat(&retval, &retval_size, "R -d ")) {
373 return 0;
374 }
375 if (!s_SafeStrCat(&retval, &retval_size,
376 filtering_options->repeatFilterOptions->database)) {
377 return 0;
378 }
379 if (!s_SafeStrCat(&retval, &retval_size, ";")) {
380 return 0;
381 }
382 } else {
383 if (!s_SafeStrCat(&retval, &retval_size, "R;")) {
384 return 0;
385 }
386 }
387 }
388
389 if (filtering_options->windowMaskerOptions) {
390 if (filtering_options->windowMaskerOptions->taxid != 0) {
391 char buffer[24] = { '\0' };
392 snprintf(buffer, sizeof(buffer), "W -t %d;",
393 filtering_options->windowMaskerOptions->taxid);
394 if (!s_SafeStrCat(&retval, &retval_size, buffer)) {
395 return 0;
396 }
397 } else if (filtering_options->windowMaskerOptions->database) {
398 if (!s_SafeStrCat(&retval, &retval_size, "W -d ")) {
399 return 0;
400 }
401 if (!s_SafeStrCat(&retval, &retval_size,
402 filtering_options->windowMaskerOptions->database)) {
403 return 0;
404 }
405 if (!s_SafeStrCat(&retval, &retval_size, ";")) {
406 return 0;
407 }
408 }
409 }
410
411 /* Mask at hash is a modifier for other filtering options, as such it
412 * doesn't make sense to apply it by itself */
413 if (SBlastFilterOptionsMaskAtHash(filtering_options)) {
414 if (strlen(retval) != 0) {
415 /* Add mask at hash as a modifier for other filtering options */
416 if (!s_SafeStrCat(&retval, &retval_size, "m;")) {
417 return 0;
418 }
419 } else {
420 /* We still need to set "m" in a filter string (WB-391, WB-394) */
421 /* The string below can be modified into "mF" or "mL" or
422 whatever is decided to be the conventional meaning */
423 if (!s_SafeStrCat(&retval, &retval_size, "m;")) {
424 return 0;
425 }
426 }
427 }
428
429 return strlen(retval) == 0
430 ? s_SafeStrCat(&retval, &retval_size, "F")
431 : retval;
432 }
433
434 Int2
BlastFilteringOptionsFromString(EBlastProgramType program_number,const char * instructions,SBlastFilterOptions ** filtering_options,Blast_Message ** blast_message)435 BlastFilteringOptionsFromString(EBlastProgramType program_number,
436 const char* instructions,
437 SBlastFilterOptions* *filtering_options,
438 Blast_Message* *blast_message)
439 {
440 Boolean mask_at_hash = FALSE; /* the default. */
441 char* buffer;
442 const char* ptr = instructions;
443 char error_buffer[1024];
444 Int2 status = 0;
445 SSegOptions* segOptions = NULL;
446 SDustOptions* dustOptions = NULL;
447 SRepeatFilterOptions* repeatOptions = NULL;
448 SWindowMaskerOptions * winmaskOptions = NULL;
449
450 *filtering_options = NULL;
451 if (blast_message)
452 *blast_message = NULL;
453
454 if (instructions == NULL || strcasecmp(instructions, "F") == 0)
455 {
456 SBlastFilterOptionsNew(filtering_options, eEmpty);
457 return status;
458 }
459
460 buffer = (char*) calloc(strlen(instructions), sizeof(char));
461 /* allow old-style filters when m cannot be followed by the ';' */
462 if (ptr[0] == 'm' && ptr[1] == ' ')
463 {
464 mask_at_hash = TRUE;
465 ptr += 2;
466 }
467
468 while (*ptr != NULLB)
469 {
470 if (*ptr == 'S')
471 {
472 SSegOptionsNew(&segOptions);
473 ptr = s_LoadOptionsToBuffer(ptr+1, buffer);
474 if (buffer[0] != NULLB)
475 {
476 int window = 0;
477 double locut = .0, hicut = .0;
478 status = s_ParseSegOptions(buffer, &window, &locut, &hicut);
479 if (status)
480 {
481 segOptions = SSegOptionsFree(segOptions);
482 sprintf(error_buffer, "Error parsing filter string: %s", buffer);
483 if (blast_message)
484 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
485 error_buffer);
486 sfree(buffer);
487 return status;
488 }
489 segOptions->window = window;
490 segOptions->locut = locut;
491 segOptions->hicut = hicut;
492 }
493 }
494 else if (*ptr == 'D')
495 {
496 SDustOptionsNew(&dustOptions);
497 ptr = s_LoadOptionsToBuffer(ptr+1, buffer);
498 if (buffer[0] != NULLB)
499 {
500 int window = 0, level = 0, linker = 0;
501 status = s_ParseDustOptions(buffer, &level, &window, &linker);
502 if (status)
503 {
504 dustOptions = SDustOptionsFree(dustOptions);
505 sprintf(error_buffer, "Error parsing filter string: %s", buffer);
506 if (blast_message)
507 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
508 error_buffer);
509 sfree(buffer);
510 return status;
511 }
512 dustOptions->level = level;
513 dustOptions->window = window;
514 dustOptions->linker = linker;
515 }
516 }
517 else if (*ptr == 'R')
518 {
519 SRepeatFilterOptionsNew(&repeatOptions);
520 ptr = s_LoadOptionsToBuffer(ptr+1, buffer);
521 if (buffer[0] != NULLB)
522 {
523 char* dbname = NULL;
524 status = s_ParseRepeatOptions(buffer, &dbname);
525 if (status)
526 {
527 repeatOptions = SRepeatFilterOptionsFree(repeatOptions);
528 sprintf(error_buffer, "Error parsing filter string: %s", buffer);
529 if (blast_message)
530 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
531 error_buffer);
532 sfree(buffer);
533 return status;
534 }
535 if (dbname)
536 {
537 sfree(repeatOptions->database);
538 repeatOptions->database = dbname;
539 }
540 }
541 }
542 else if (*ptr == 'W')
543 {
544 SWindowMaskerOptionsNew(&winmaskOptions);
545
546 ptr = s_LoadOptionsToBuffer(ptr+1, buffer);
547 if (buffer[0] != NULLB) {
548 char* dbname = NULL;
549 int taxid = 0;
550
551 status = s_ParseWindowMaskerOptions(buffer, &dbname, &taxid);
552 if (status) {
553 winmaskOptions = SWindowMaskerOptionsFree(winmaskOptions);
554 sprintf(error_buffer, "Error parsing filter string: %s", buffer);
555 if (blast_message)
556 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
557 error_buffer);
558
559 sfree(buffer);
560 return status;
561 }
562 if (dbname) {
563 sfree(winmaskOptions->database);
564 winmaskOptions->database = dbname;
565 }
566 if (taxid) {
567 winmaskOptions->taxid = taxid;
568 }
569 }
570 }
571 else if (*ptr == 'L' || *ptr == 'T')
572 { /* do low-complexity filtering; dust for blastn, otherwise seg.*/
573 if (program_number == eBlastTypeBlastn
574 || program_number == eBlastTypeMapping)
575
576 SDustOptionsNew(&dustOptions);
577 else
578 SSegOptionsNew(&segOptions);
579 ptr++;
580 }
581 else if (*ptr == 'm')
582 {
583 mask_at_hash = TRUE;
584 ptr++;
585 }
586 else
587 { /* Nothing applied */
588 ptr++;
589 }
590 }
591 sfree(buffer);
592
593 status = SBlastFilterOptionsNew(filtering_options, eEmpty);
594 if (status)
595 return status;
596
597 (*filtering_options)->dustOptions = dustOptions;
598 (*filtering_options)->segOptions = segOptions;
599 (*filtering_options)->repeatFilterOptions = repeatOptions;
600 (*filtering_options)->windowMaskerOptions = winmaskOptions;
601 (*filtering_options)->mask_at_hash = mask_at_hash;
602
603 return status;
604 }
605
606
BlastSeqLocNew(BlastSeqLoc ** head,Int4 from,Int4 to)607 BlastSeqLoc* BlastSeqLocNew(BlastSeqLoc** head, Int4 from, Int4 to)
608 {
609 BlastSeqLoc* loc = (BlastSeqLoc*) calloc(1, sizeof(BlastSeqLoc));
610 if ( !loc ) {
611 return NULL;
612 }
613 loc->ssr = (SSeqRange*) calloc(1, sizeof(SSeqRange));
614 loc->ssr->left = from;
615 loc->ssr->right = to;
616
617 return BlastSeqLocAppend(head, loc);
618 }
619
BlastSeqLocAppend(BlastSeqLoc ** head,BlastSeqLoc * node)620 BlastSeqLoc* BlastSeqLocAppend(BlastSeqLoc** head, BlastSeqLoc* node)
621 {
622 if ( !node ) {
623 return NULL;
624 }
625
626 if (head)
627 {
628 if (*head)
629 {
630 BlastSeqLoc* tmp = *head;
631 while (tmp->next)
632 tmp = tmp->next;
633 tmp->next = node;
634 }
635 else
636 {
637 *head = node;
638 }
639 }
640
641 return node;
642 }
643
644 /** Makes a copy of the BlastSeqLoc and also a copy of the
645 * SSRange element. Does not copy BlastSeqLoc that is pointed
646 * to by "next".
647 * @param source the object to be copied [in]
648 * @return another BlastSeqLoc*
649 */
s_BlastSeqLocNodeDup(BlastSeqLoc * source)650 static BlastSeqLoc* s_BlastSeqLocNodeDup(BlastSeqLoc* source)
651 {
652 if ( !source ) {
653 return NULL;
654 }
655 ASSERT(source->ssr);
656 return BlastSeqLocNew(NULL, source->ssr->left, source->ssr->right);
657 }
658
659 /** Calculates number of links in a chain of BlastSeqLoc's.
660 * @param var Chain of BlastSeqLoc structures [in]
661 * @return Number of links in the chain.
662 */
s_BlastSeqLocLen(const BlastSeqLoc * var)663 static Int4 s_BlastSeqLocLen(const BlastSeqLoc* var)
664 {
665 BlastSeqLoc* itr = NULL;
666 Int4 retval = 0;
667
668 for (itr = (BlastSeqLoc*)var; itr; itr = itr->next, retval++) {
669 ;
670 }
671 return retval;
672 }
673
674 /** Converts a BlastSeqLoc list to an array of pointers, each pointing to an
675 * element of the list passed in to this function and the last element points
676 * to NULL
677 * @param list List to convert to an array of pointers [in]
678 * @param count number of elements populated in the array [out]
679 */
680 static BlastSeqLoc**
s_BlastSeqLocListToArrayOfPointers(const BlastSeqLoc * list,Int4 * count)681 s_BlastSeqLocListToArrayOfPointers(const BlastSeqLoc* list, Int4* count)
682 {
683 BlastSeqLoc* tmp,** retval;
684 Int4 i;
685 *count = 0;
686
687 if (list == NULL)
688 return NULL;
689
690 *count = s_BlastSeqLocLen(list);
691 retval = (BlastSeqLoc**) calloc(((size_t)(*count)+1), sizeof(BlastSeqLoc*));
692 for (tmp = (BlastSeqLoc*)list, i = 0; tmp != NULL && i < *count; i++) {
693 retval[i] = tmp;
694 tmp = tmp->next;
695 }
696 return retval;
697 }
698
699 /** Reverse elements in the list
700 * @param head pointer to pointer to the head of the list. [in|out]
701 * (this is not declared static so that it can be tested in the unit tests
702 */
703 NCBI_XBLAST_EXPORT
BlastSeqLocListReverse(BlastSeqLoc ** head)704 void BlastSeqLocListReverse(BlastSeqLoc** head)
705 {
706 BlastSeqLoc** ptrs = NULL; /* array of pointers to BlastSeqLoc elements */
707 Int4 num_elems = 0, i = 0;
708
709 if ( !head ) {
710 return;
711 }
712
713 ptrs = s_BlastSeqLocListToArrayOfPointers(*head, &num_elems);
714 if (num_elems == 0) {
715 return;
716 }
717 ASSERT(ptrs);
718 *head = ptrs[num_elems-1];
719 for (i = num_elems-1; i > 0; i--) {
720 ptrs[i]->next = ptrs[i-1];
721 }
722 ptrs[0]->next = NULL;
723 sfree(ptrs);
724 }
725
BlastSeqLocNodeFree(BlastSeqLoc * loc)726 BlastSeqLoc* BlastSeqLocNodeFree(BlastSeqLoc* loc)
727 {
728 if ( !loc ) {
729 return NULL;
730 }
731 sfree(loc->ssr);
732 sfree(loc);
733 return NULL;
734 }
735
BlastSeqLocFree(BlastSeqLoc * loc)736 BlastSeqLoc* BlastSeqLocFree(BlastSeqLoc* loc)
737 {
738 while (loc) {
739 BlastSeqLoc* next_loc = loc->next;
740 loc = BlastSeqLocNodeFree(loc);
741 loc = next_loc;
742 }
743 return NULL;
744 }
745
BlastSeqLocListDup(BlastSeqLoc * head)746 BlastSeqLoc* BlastSeqLocListDup(BlastSeqLoc* head)
747 {
748 BlastSeqLoc* retval = NULL;
749 BlastSeqLoc* retval_tail = NULL;
750
751 for (; head; head = head->next) {
752 retval_tail = BlastSeqLocAppend(retval_tail ? &retval_tail : &retval,
753 s_BlastSeqLocNodeDup(head));
754 }
755
756 return retval;
757 }
758
BlastMaskLocNew(Int4 total)759 BlastMaskLoc* BlastMaskLocNew(Int4 total)
760 {
761 BlastMaskLoc* retval = (BlastMaskLoc *) calloc(1, sizeof(BlastMaskLoc));
762 retval->total_size = total;
763 if (total > 0)
764 retval->seqloc_array = (BlastSeqLoc **) calloc(total,
765 sizeof(BlastSeqLoc *));
766 return retval;
767 }
768
BlastMaskLocDup(const BlastMaskLoc * mask_loc)769 BlastMaskLoc* BlastMaskLocDup(const BlastMaskLoc* mask_loc)
770 {
771 BlastMaskLoc* retval = NULL;
772 Int4 index = 0;
773
774 if ( !mask_loc ) {
775 return NULL;
776 }
777
778 retval = BlastMaskLocNew(mask_loc->total_size);
779
780 for (index = 0; index < mask_loc->total_size; index++) {
781 retval->seqloc_array[index] =
782 BlastSeqLocListDup(mask_loc->seqloc_array[index]);
783 }
784
785 return retval;
786 }
787
BlastMaskLocFree(BlastMaskLoc * mask_loc)788 BlastMaskLoc* BlastMaskLocFree(BlastMaskLoc* mask_loc)
789 {
790 Int4 index;
791
792 if (mask_loc == NULL)
793 return NULL;
794
795 for (index=0; index<mask_loc->total_size; index++)
796 {
797 if (mask_loc->seqloc_array != NULL)
798 BlastSeqLocFree(mask_loc->seqloc_array[index]);
799 }
800 sfree(mask_loc->seqloc_array);
801 sfree(mask_loc);
802 return NULL;
803 }
804
BlastMaskLocDNAToProtein(BlastMaskLoc * mask_loc,const BlastQueryInfo * query_info)805 Int2 BlastMaskLocDNAToProtein(BlastMaskLoc* mask_loc,
806 const BlastQueryInfo* query_info)
807 {
808 Uint4 seq_index;
809 BlastSeqLoc* dna_seqlocs[NUM_FRAMES];
810
811 if (!mask_loc)
812 return 0;
813
814 /* Check that the array size in BlastMaskLoc corresponds to the number
815 of contexts in BlastQueryInfo. */
816 ASSERT(mask_loc->total_size == query_info->last_context + 1);
817
818 /* Loop over multiple DNA sequences */
819 for (seq_index = 0; seq_index < (Uint4)query_info->num_queries;
820 ++seq_index) {
821 const Uint4 ctx_idx = NUM_FRAMES * seq_index;
822 const Int4 dna_length = BlastQueryInfoGetQueryLength(query_info,
823 eBlastTypeBlastx,
824 seq_index);
825 Int4 context;
826
827 /* Save the DNA masking locations, as they'll be freed and overwritten
828 * by their translations */
829 memset((void*) &dna_seqlocs, 0, sizeof(dna_seqlocs));
830 memcpy((void*) &dna_seqlocs,
831 (void*) &mask_loc->seqloc_array[ctx_idx],
832 sizeof(dna_seqlocs));
833 memset((void*) &mask_loc->seqloc_array[ctx_idx], 0, sizeof(dna_seqlocs));
834
835 /* Reproduce this mask for all 6 frames, with translated coordinates */
836 for (context = 0; context < NUM_FRAMES; ++context) {
837 const Int2 frame = BLAST_ContextToFrame(eBlastTypeBlastx, context);
838 BlastSeqLoc* frame_seqloc = dna_seqlocs[context];
839 BlastSeqLoc* prot_tail = NULL;
840 BlastSeqLoc* itr = NULL;
841
842 /* If no masks were provided for some frames, use the first one */
843 if (frame_seqloc == NULL && dna_seqlocs[0]) {
844 frame_seqloc = dna_seqlocs[0];
845 }
846 for (itr = frame_seqloc; itr; itr = itr->next) {
847 Int4 from, to;
848 SSeqRange* seq_range = itr->ssr;
849 /* masks should be 0-offset */
850 ASSERT(seq_range->right < dna_length);
851 ASSERT(seq_range->left >= 0);
852 if (frame < 0) {
853 from = (dna_length + frame - seq_range->right)/CODON_LENGTH;
854 to = (dna_length + frame - seq_range->left)/CODON_LENGTH;
855 } else {
856 from = (seq_range->left - frame + 1)/CODON_LENGTH;
857 to = (seq_range->right - frame + 1)/CODON_LENGTH;
858 }
859
860 if (from < 0)
861 from = 0;
862 if (to < 0)
863 to = 0;
864 if (from >= query_info->contexts[ctx_idx+context].query_length)
865 from = query_info->contexts[ctx_idx+context].query_length - 1;
866 if (to >= query_info->contexts[ctx_idx+context].query_length)
867 to = query_info->contexts[ctx_idx+context].query_length - 1;
868
869 ASSERT(from >= 0);
870 ASSERT(to >= 0);
871 ASSERT(from < query_info->contexts[ctx_idx+context].query_length);
872 ASSERT(to < query_info->contexts[ctx_idx+context].query_length);
873
874 /* Cache the tail of the list to avoid the overhead of
875 * traversing the list when appending to it */
876 prot_tail = BlastSeqLocNew((prot_tail
877 ? & prot_tail
878 : & mask_loc->seqloc_array[ctx_idx+context]),
879 from, to);
880 }
881 }
882 for (context = 0; context < NUM_FRAMES; ++context) {
883 BlastSeqLocFree(dna_seqlocs[context]);
884 }
885 }
886
887 return 0;
888 }
889
890
BlastMaskLocProteinToDNA(BlastMaskLoc * mask_loc,const BlastQueryInfo * query_info)891 Int2 BlastMaskLocProteinToDNA(BlastMaskLoc* mask_loc,
892 const BlastQueryInfo* query_info)
893 {
894 Int2 status = 0;
895 Int4 index;
896
897 /* If there is not mask, there is nothing to convert to DNA coordinates,
898 hence just return. */
899 if (!mask_loc)
900 return 0;
901
902 /* Check that the array size in BlastMaskLoc corresponds to the number
903 of contexts in BlastQueryInfo. */
904 ASSERT(mask_loc->total_size == query_info->last_context + 1);
905
906 /* Loop over all DNA sequences */
907 for (index=0; index < query_info->num_queries; ++index)
908 {
909 Int4 frame_start = index*NUM_FRAMES;
910 Int4 frame_index;
911 Int4 dna_length = BlastQueryInfoGetQueryLength(query_info,
912 eBlastTypeBlastx,
913 index);
914 /* Loop over all frames of one DNA sequence */
915 for (frame_index=frame_start; frame_index<(frame_start+NUM_FRAMES);
916 frame_index++) {
917 BlastSeqLoc* loc;
918 Int2 frame =
919 BLAST_ContextToFrame(eBlastTypeBlastx, frame_index % NUM_FRAMES);
920 /* Loop over all mask locations for a given frame */
921 for (loc = mask_loc->seqloc_array[frame_index]; loc; loc = loc->next) {
922 Int4 from=0, to=0;
923 SSeqRange* seq_range = loc->ssr;
924 if (frame < 0) {
925 to = dna_length - CODON_LENGTH*seq_range->left + frame;
926 from = dna_length - CODON_LENGTH*seq_range->right + frame + 1;
927 } else {
928 from = CODON_LENGTH*seq_range->left + frame - 1;
929 to = CODON_LENGTH*seq_range->right + frame - 1;
930 }
931
932 if (from < 0)
933 from = 0;
934 if (to < 0)
935 to = 0;
936 if (from >= dna_length)
937 from = dna_length - 1;
938 if (to >= dna_length)
939 to = dna_length - 1;
940
941 ASSERT(from >= 0);
942 ASSERT(to >= 0);
943 ASSERT(from < dna_length);
944 ASSERT(to < dna_length);
945
946 seq_range->left = from;
947 seq_range->right = to;
948 }
949 }
950 }
951 return status;
952 }
953
954 /** Used for qsort, compares two SeqLoc's by starting position. */
s_SeqRangeSortByStartPosition(const void * vp1,const void * vp2)955 static int s_SeqRangeSortByStartPosition(const void *vp1, const void *vp2)
956 {
957 BlastSeqLoc* v1 = *((BlastSeqLoc**) vp1);
958 BlastSeqLoc* v2 = *((BlastSeqLoc**) vp2);
959 SSeqRange* loc1 = (SSeqRange*) v1->ssr;
960 SSeqRange* loc2 = (SSeqRange*) v2->ssr;
961
962 if (loc1->left < loc2->left)
963 return -1;
964 else if (loc1->left > loc2->left)
965 return 1;
966 else
967 return 0;
968 }
969
970 void
BlastSeqLocCombine(BlastSeqLoc ** mask_loc,Int4 link_value)971 BlastSeqLocCombine(BlastSeqLoc** mask_loc, Int4 link_value)
972 {
973 BlastSeqLoc** ptrs = NULL;
974 Int4 i = 0, num_elems = 0;
975
976 /* Break up the list into an array of pointers and sort it */
977 ptrs = s_BlastSeqLocListToArrayOfPointers(*mask_loc, &num_elems);
978 if (num_elems == 0) {
979 return;
980 }
981 ASSERT(ptrs);
982 qsort(ptrs, (size_t)num_elems, sizeof(*ptrs),
983 s_SeqRangeSortByStartPosition);
984
985 /* Merge the overlapping elements */
986 {
987 BlastSeqLoc* curr_tail = *mask_loc = ptrs[0];
988 for (i = 0; i < num_elems - 1; i++) {
989 const SSeqRange* next_ssr = ptrs[i+1]->ssr;
990 const Int4 stop = curr_tail->ssr->right;
991
992 if ((stop + link_value) > next_ssr->left) {
993 curr_tail->ssr->right = MAX(stop, next_ssr->right);
994 ptrs[i+1] = BlastSeqLocNodeFree(ptrs[i+1]);
995 } else {
996 curr_tail = ptrs[i+1];
997 }
998 }
999 }
1000
1001 /* Rebuild the linked list */
1002 {
1003 BlastSeqLoc* tail = *mask_loc;
1004 for (i = 1; i < num_elems; i++) {
1005 if (ptrs[i]) {
1006 tail->next = ptrs[i];
1007 tail = ptrs[i];
1008 }
1009 }
1010 tail->next = NULL;
1011 }
1012 sfree(ptrs);
1013 }
1014
1015 Int2
BLAST_ComplementMaskLocations(EBlastProgramType program_number,const BlastQueryInfo * query_info,const BlastMaskLoc * mask_loc,BlastSeqLoc ** complement_mask)1016 BLAST_ComplementMaskLocations(EBlastProgramType program_number,
1017 const BlastQueryInfo* query_info,
1018 const BlastMaskLoc* mask_loc, BlastSeqLoc* *complement_mask)
1019 {
1020 Int4 context;
1021 const Boolean kIsNucl = (program_number == eBlastTypeBlastn ||
1022 program_number == eBlastTypeMapping);
1023 BlastSeqLoc* tail = NULL; /* Pointer to the tail of the complement_mask
1024 linked list */
1025
1026 if (complement_mask == NULL)
1027 return -1;
1028
1029 *complement_mask = NULL;
1030
1031 for (context = query_info->first_context;
1032 context <= query_info->last_context; ++context) {
1033
1034 Boolean first = TRUE; /* Specifies beginning of query. */
1035 Boolean last_interval_open=TRUE; /* if TRUE last interval needs to be closed. */
1036 Int4 start_offset, end_offset, filter_start, filter_end;
1037 Int4 left=0, right; /* Used for left/right extent of a region. */
1038 BlastSeqLoc* loc = NULL;
1039
1040 if (query_info->contexts[context].is_valid == FALSE) {
1041 continue;
1042 }
1043
1044 start_offset = query_info->contexts[context].query_offset;
1045 end_offset = query_info->contexts[context].query_length
1046 + start_offset - 1;
1047 ASSERT(start_offset <= end_offset);
1048
1049 /* mask_loc NULL is simply the case that NULL was passed in, which we
1050 take to mean that nothing on query is masked. */
1051 if (mask_loc == NULL || mask_loc->seqloc_array[context] == NULL) {
1052 /* Cache the tail of the list to avoid the overhead of traversing the
1053 * list when appending to it */
1054 tail = BlastSeqLocNew(tail ? &tail : complement_mask,
1055 start_offset, end_offset);
1056 continue;
1057 }
1058
1059 if (BlastIsReverseStrand(kIsNucl, context)) {
1060 BlastSeqLocListReverse(&mask_loc->seqloc_array[context]);
1061 }
1062 loc = mask_loc->seqloc_array[context];
1063
1064 first = TRUE;
1065 for ( ; loc; loc = loc->next) {
1066 SSeqRange* seq_range = loc->ssr;
1067 if (BlastIsReverseStrand(kIsNucl, context)) {
1068 filter_start = end_offset - seq_range->right;
1069 filter_end = end_offset - seq_range->left;
1070 } else {
1071 filter_start = start_offset + seq_range->left;
1072 filter_end = start_offset + seq_range->right;
1073 }
1074 /* The canonical "state" at the top of this
1075 while loop is that both "left" and "right" have
1076 been initialized to their correct values.
1077 The first time this loop is entered in a call to
1078 the function this is not true and the following "if"
1079 statement moves everything to the canonical state. */
1080 if (first) {
1081 last_interval_open = TRUE;
1082 first = FALSE;
1083
1084 if (filter_start > start_offset) {
1085 /* beginning of sequence not filtered */
1086 left = start_offset;
1087 } else {
1088 /* beginning of sequence filtered */
1089 left = filter_end + 1;
1090 continue;
1091 }
1092 }
1093
1094 right = filter_start - 1;
1095
1096 /* Cache the tail of the list to avoid the overhead of traversing the
1097 * list when appending to it */
1098 tail = BlastSeqLocNew((tail ? &tail : complement_mask), left, right);
1099 if (filter_end >= end_offset) {
1100 /* last masked region at end of sequence */
1101 last_interval_open = FALSE;
1102 break;
1103 } else {
1104 left = filter_end + 1;
1105 }
1106 }
1107
1108 if (last_interval_open) {
1109 /* Need to finish SSeqRange* for last interval. */
1110 right = end_offset;
1111 /* Cache the tail of the list to avoid the overhead of traversing the
1112 * list when appending to it */
1113 tail = BlastSeqLocNew((tail ? &tail : complement_mask), left, right);
1114 }
1115 }
1116 return 0;
1117 }
1118
1119
1120 Int2
BlastSetUp_Filter(EBlastProgramType program_number,Uint1 * sequence,Int4 length,Int4 offset,const SBlastFilterOptions * filter_options,BlastSeqLoc ** seqloc_retval,Blast_Message ** blast_message)1121 BlastSetUp_Filter(EBlastProgramType program_number,
1122 Uint1* sequence,
1123 Int4 length,
1124 Int4 offset,
1125 const SBlastFilterOptions* filter_options,
1126 BlastSeqLoc** seqloc_retval,
1127 Blast_Message* *blast_message)
1128 {
1129 Int2 status=0; /* return value. */
1130
1131 ASSERT(filter_options);
1132 ASSERT(seqloc_retval);
1133
1134 *seqloc_retval = NULL;
1135
1136 status = SBlastFilterOptionsValidate(program_number, filter_options,
1137 blast_message);
1138 if (status)
1139 return status;
1140
1141 if (filter_options->segOptions)
1142 {
1143 SSegOptions* seg_options = filter_options->segOptions;
1144 SegParameters* sparamsp=NULL;
1145
1146 sparamsp = SegParametersNewAa();
1147 sparamsp->overlaps = TRUE;
1148 if (seg_options->window > 0)
1149 sparamsp->window = seg_options->window;
1150 if (seg_options->locut > 0.0)
1151 sparamsp->locut = seg_options->locut;
1152 if (seg_options->hicut > 0.0)
1153 sparamsp->hicut = seg_options->hicut;
1154
1155 status = SeqBufferSeg(sequence, length, offset, sparamsp,
1156 seqloc_retval);
1157 SegParametersFree(sparamsp);
1158 sparamsp = NULL;
1159 }
1160
1161 return status;
1162 }
1163
1164 void
BlastSeqLocReverse(BlastSeqLoc * masks,Int4 query_length)1165 BlastSeqLocReverse(BlastSeqLoc* masks, Int4 query_length)
1166 {
1167 for(; masks; masks = masks->next) {
1168 masks->ssr->left = query_length - 1 - masks->ssr->right;
1169 masks->ssr->right = query_length - 1 - masks->ssr->left;
1170 }
1171 }
1172
1173 /** Calculates the mask locations one context at a time.
1174 * @param query_blk sequence [in]
1175 * @param query_info information about sequences [in]
1176 * @param context which context is this? [in]
1177 * @param program_number program (blastn, blastp, etc.) [in]
1178 * @param filter_options instructions for producing mask [in]
1179 * @param filter_out results of filtering operations [out]
1180 * @param blast_message any error or warning messages [out]
1181 * @return zero on success
1182 */
1183 static Int2
s_GetFilteringLocationsForOneContext(BLAST_SequenceBlk * query_blk,const BlastQueryInfo * query_info,Int4 context,EBlastProgramType program_number,const SBlastFilterOptions * filter_options,BlastSeqLoc ** filter_out,Blast_Message ** blast_message)1184 s_GetFilteringLocationsForOneContext(BLAST_SequenceBlk* query_blk,
1185 const BlastQueryInfo* query_info,
1186 Int4 context,
1187 EBlastProgramType program_number,
1188 const SBlastFilterOptions* filter_options,
1189 BlastSeqLoc* *filter_out,
1190 Blast_Message* *blast_message)
1191 {
1192 Int2 status = 0;
1193 Int4 query_length = 0; /* Length of query described by SeqLocPtr. */
1194 Int4 context_offset;
1195 Uint1 *buffer; /* holds sequence for plus strand or protein. */
1196
1197 const Boolean kIsNucl = (program_number == eBlastTypeBlastn ||
1198 program_number == eBlastTypeMapping);
1199
1200 context_offset = query_info->contexts[context].query_offset;
1201 buffer = &query_blk->sequence[context_offset];
1202
1203 if (query_info->contexts[context].is_valid == FALSE) {
1204 return 0;
1205 }
1206
1207 query_length = query_info->contexts[context].query_length;
1208
1209 status = BlastSetUp_Filter(program_number,
1210 buffer,
1211 query_length,
1212 0,
1213 filter_options,
1214 filter_out,
1215 blast_message);
1216 if (status)
1217 return status;
1218
1219 if (BlastIsReverseStrand(kIsNucl, context) == TRUE) {
1220 /* Reverse this as it's on minus strand. */
1221 BlastSeqLocReverse(*filter_out, query_length);
1222 }
1223
1224 /* Extract the mask locations corresponding to this query
1225 (frame, strand), detach it from other masks.
1226 NB: for translated search the mask locations are expected in
1227 protein coordinates. The nucleotide locations must be converted
1228 to protein coordinates prior to the call to BLAST_MainSetUp.
1229 */
1230 {
1231 /* Auxiliary locations for lower-case masking or any other masking
1232 * which occurred outside of CORE BLAST */
1233 BlastSeqLoc *lcase_mask_slp = NULL;
1234 if (query_blk->lcase_mask && query_blk->lcase_mask->seqloc_array)
1235 {
1236 ASSERT(context < query_blk->lcase_mask->total_size);
1237 lcase_mask_slp = query_blk->lcase_mask->seqloc_array[context];
1238 /* Set location list to NULL, to allow safe memory deallocation,
1239 ownership transferred to filter_out below. */
1240 query_blk->lcase_mask->seqloc_array[context] = NULL;
1241 }
1242
1243 /* Attach the lower case mask locations to the filter locations and
1244 combine them */
1245 BlastSeqLocAppend(filter_out, lcase_mask_slp);
1246 }
1247
1248 BlastSeqLocCombine(filter_out, 0);
1249
1250 return 0;
1251 }
1252
1253 Int2
BlastSetUp_GetFilteringLocations(BLAST_SequenceBlk * query_blk,const BlastQueryInfo * query_info,EBlastProgramType program_number,const SBlastFilterOptions * filter_options,BlastMaskLoc ** filter_maskloc,Blast_Message ** blast_message)1254 BlastSetUp_GetFilteringLocations(BLAST_SequenceBlk* query_blk,
1255 const BlastQueryInfo* query_info,
1256 EBlastProgramType program_number,
1257 const SBlastFilterOptions* filter_options,
1258 BlastMaskLoc** filter_maskloc,
1259 Blast_Message** blast_message)
1260 {
1261 Int2 status = 0;
1262 Int4 context = 0; /* loop variable. */
1263 const int kNumContexts = query_info->last_context + 1;
1264
1265 ASSERT(query_info && query_blk && filter_maskloc);
1266
1267 ASSERT(blast_message);
1268 ASSERT(kNumContexts ==
1269 query_info->num_queries*BLAST_GetNumberOfContexts(program_number));
1270 *filter_maskloc = BlastMaskLocNew(kNumContexts);
1271
1272 for (context = query_info->first_context;
1273 context <= query_info->last_context; ++context) {
1274
1275 BlastSeqLoc *filter_per_context = NULL;
1276 status = s_GetFilteringLocationsForOneContext(query_blk,
1277 query_info,
1278 context,
1279 program_number,
1280 filter_options,
1281 &filter_per_context,
1282 blast_message);
1283 if (status) {
1284 Blast_MessageWrite(blast_message, eBlastSevError, context,
1285 "Failure at filtering");
1286 return status;
1287 }
1288
1289 /* NB: for translated searches filter locations are returned in
1290 protein coordinates, because the DNA lengths of sequences are
1291 not available here. The caller must take care of converting
1292 them back to nucleotide coordinates. */
1293 (*filter_maskloc)->seqloc_array[context] = filter_per_context;
1294 }
1295 return 0;
1296 }
1297
1298 void
Blast_MaskTheResidues(Uint1 * buffer,Int4 length,Boolean is_na,const BlastSeqLoc * mask_loc,Boolean reverse,Int4 offset)1299 Blast_MaskTheResidues(Uint1 * buffer, Int4 length, Boolean is_na,
1300 const BlastSeqLoc* mask_loc, Boolean reverse, Int4 offset)
1301 {
1302 const Uint1 kMaskingLetter = is_na ? kNuclMask : kProtMask;
1303 ASSERT(buffer);
1304 for (; mask_loc; mask_loc = mask_loc->next) {
1305
1306 Int4 index, start, stop;
1307
1308 if (reverse) {
1309 start = length - 1 - mask_loc->ssr->right;
1310 stop = length - 1 - mask_loc->ssr->left;
1311 } else {
1312 start = mask_loc->ssr->left;
1313 stop = mask_loc->ssr->right;
1314 }
1315
1316 start -= offset;
1317 stop -= offset;
1318
1319 ASSERT(start < length);
1320 ASSERT(stop <= length);
1321
1322 for (index = start; index <= stop; index++)
1323 buffer[index] = kMaskingLetter;
1324 }
1325 }
1326
1327 void
Blast_MaskUnsupportedAA(BLAST_SequenceBlk * seq,Uint1 min_invalid)1328 Blast_MaskUnsupportedAA(BLAST_SequenceBlk* seq, Uint1 min_invalid)
1329 {
1330 Uint1 *sequence = seq->sequence;
1331 Int4 length = seq->length;
1332 Int4 i;
1333
1334 for (i = 0; i < length; i++) {
1335 if (sequence[i] >= min_invalid) {
1336 sequence[i] = kProtMask;
1337 }
1338 }
1339 }
1340
1341 void
BlastSetUp_MaskQuery(BLAST_SequenceBlk * query_blk,const BlastQueryInfo * query_info,const BlastMaskLoc * filter_maskloc,EBlastProgramType program_number)1342 BlastSetUp_MaskQuery(BLAST_SequenceBlk* query_blk,
1343 const BlastQueryInfo* query_info,
1344 const BlastMaskLoc *filter_maskloc,
1345 EBlastProgramType program_number)
1346 {
1347 const Boolean kIsNucl = (program_number == eBlastTypeBlastn
1348 || program_number == eBlastTypeMapping);
1349 Int4 context; /* loop variable. */
1350 Int4 total_length;
1351 Boolean has_mask = FALSE; /* Check for whether filter_maskloc is empty. */
1352 Int4 index; /* loop variable. */
1353
1354 ASSERT(query_blk);
1355 ASSERT(query_info);
1356 ASSERT(filter_maskloc);
1357
1358
1359 for (index=0; index<filter_maskloc->total_size; index++)
1360 {
1361 if (filter_maskloc->seqloc_array[index])
1362 {
1363 has_mask = TRUE;
1364 break;
1365 }
1366 }
1367 if (has_mask == FALSE)
1368 return;
1369
1370
1371 total_length = query_info->contexts[query_info->last_context].query_offset
1372 + query_info->contexts[query_info->last_context].query_length + 2;
1373 query_blk->sequence_start_nomask = BlastMemDup(query_blk->sequence_start, total_length);
1374 query_blk->sequence_nomask = query_blk->sequence_start_nomask + 1;
1375 query_blk->nomask_allocated = TRUE;
1376
1377 for (context = query_info->first_context;
1378 context <= query_info->last_context; ++context) {
1379
1380 Int4 query_length = 0;
1381 Int4 context_offset = 0;
1382 Uint1 *buffer = NULL; /* holds sequence */
1383
1384 if (query_info->contexts[context].is_valid == FALSE) {
1385 continue;
1386 }
1387
1388 query_length = query_info->contexts[context].query_length;
1389
1390 context_offset = query_info->contexts[context].query_offset;
1391 buffer = &query_blk->sequence[context_offset];
1392 ASSERT(buffer);
1393
1394 Blast_MaskTheResidues(buffer, query_length, kIsNucl,
1395 filter_maskloc->seqloc_array[context],
1396 BlastIsReverseStrand(kIsNucl, context), 0);
1397 }
1398 }
1399