1 /* $Id: blast_filter.c 533522 2017-04-17 15:50:34Z boratyng $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 /** @file blast_filter.c
28 * All code related to query sequence masking/filtering for BLAST
29 */
30
31 #include <algo/blast/core/blast_util.h>
32 #include <algo/blast/core/blast_filter.h>
33 #include <algo/blast/core/blast_seg.h>
34 #include "jumper.h"
35
36 /****************************************************************************/
37 /* Constants */
38 const Uint1 kNuclMask = 14; /* N in BLASTNA */
39 const Uint1 kProtMask = 21; /* X in NCBISTDAA */
40
41
42 /** Allowed length of the filtering options string. */
43 #define BLASTOPTIONS_BUFFER_SIZE 128
44
45
46 /** Copies filtering commands for one filtering algorithm from "instructions" to
47 * "buffer".
48 * ";" is a delimiter for the commands for different algorithms, so it stops
49 * copying when a ";" is found.
50 * Example filtering string: "m L; R -d rodents.lib"
51 * @param instructions filtering commands [in]
52 * @param buffer filled with filtering commands for one algorithm. [out]
53 */
54 static const char *
s_LoadOptionsToBuffer(const char * instructions,char * buffer)55 s_LoadOptionsToBuffer(const char *instructions, char* buffer)
56 {
57 Boolean not_started=TRUE;
58 char* buffer_ptr;
59 const char *ptr;
60 Int4 index;
61
62 ptr = instructions;
63 buffer_ptr = buffer;
64 for (index=0; index<BLASTOPTIONS_BUFFER_SIZE && *ptr != NULLB; index++)
65 {
66 if (*ptr == ';')
67 { /* ";" is a delimiter for different filtering algorithms. */
68 ptr++;
69 break;
70 }
71 /* Remove blanks at the beginning. */
72 if (not_started && *ptr == ' ')
73 {
74 ptr++;
75 }
76 else
77 {
78 not_started = FALSE;
79 *buffer_ptr = *ptr;
80 buffer_ptr++; ptr++;
81 }
82 }
83
84 *buffer_ptr = NULLB;
85
86 if (not_started == FALSE)
87 { /* Remove trailing blanks. */
88 buffer_ptr--;
89 while (*buffer_ptr == ' ' && buffer_ptr > buffer)
90 {
91 *buffer_ptr = NULLB;
92 buffer_ptr--;
93 }
94 }
95
96 return ptr;
97 }
98
99 /** Parses repeat filtering options string.
100 * @param repeat_options Input character string [in]
101 * @param dbname Database name for repeats filtering [out]
102 */
103 static Int2
s_ParseRepeatOptions(const char * repeat_options,char ** dbname)104 s_ParseRepeatOptions(const char* repeat_options, char** dbname)
105 {
106 char* ptr;
107
108 ASSERT(dbname);
109 *dbname = NULL;
110
111 if (!repeat_options)
112 return 0;
113
114 ptr = strstr(repeat_options, "-d");
115 if (ptr) {
116 ptr += 2;
117 while (*ptr == ' ' || *ptr == '\t')
118 ++ptr;
119 *dbname = strdup(ptr);
120 }
121 return 0;
122 }
123
124 /** Parses window masker options string.
125 * @param winmask_options Input character string [in]
126 * @param dbname Database name for window masker filtering [out]
127 * @param taxid Taxonomic ID for window masker filtering [out]
128 */
129 static Int2
s_ParseWindowMaskerOptions(const char * winmask_options,char ** dbname,int * taxid)130 s_ParseWindowMaskerOptions(const char * winmask_options,
131 char ** dbname,
132 int * taxid)
133 {
134 char* ptr = NULL;
135
136 ASSERT(dbname);
137 *dbname = NULL;
138
139 if (!winmask_options)
140 return 0;
141
142 ptr = strstr(winmask_options, "-d");
143
144 if (ptr) {
145 char * endp = 0;
146
147 ptr += 2;
148 while (*ptr == ' ' || *ptr == '\t')
149 ++ptr;
150
151 *dbname = strdup(ptr);
152
153 for(endp = *dbname; *endp; ++endp) {
154 if (*endp == ' ' || *endp == '\t') {
155 *endp = (char)0;
156 break;
157 }
158 }
159 } else {
160 ptr = strstr(winmask_options, "-t");
161
162 if (ptr) {
163 ptr += 2;
164 while (*ptr == ' ' || *ptr == '\t')
165 ++ptr;
166 *taxid = atoi(ptr);
167 }
168 }
169
170 return 0;
171 }
172
173 /** Parses options used for dust.
174 * @param ptr buffer containing instructions. [in]
175 * @param level sets level for dust. [out]
176 * @param window sets window for dust [out]
177 * @param linker sets linker for dust. [out]
178 */
179 static Int2
s_ParseDustOptions(const char * ptr,int * level,int * window,int * linker)180 s_ParseDustOptions(const char *ptr, int* level, int* window, int* linker)
181
182 {
183 char buffer[BLASTOPTIONS_BUFFER_SIZE];
184 int arg, index, index1, window_pri=-1, linker_pri=-1, level_pri=-1;
185
186 arg = 0;
187 index1 = 0;
188 for (index=0; index<BLASTOPTIONS_BUFFER_SIZE; index++)
189 {
190 if (*ptr == ' ' || *ptr == NULLB)
191 {
192 long tmplong;
193 buffer[index1] = NULLB;
194 index1 = 0;
195 switch(arg) {
196 case 0:
197 sscanf(buffer, "%ld", &tmplong);
198 level_pri = tmplong;
199 break;
200 case 1:
201 sscanf(buffer, "%ld", &tmplong);
202 window_pri = tmplong;
203 break;
204 case 2:
205 sscanf(buffer, "%ld", &tmplong);
206 linker_pri = tmplong;
207 break;
208 default:
209 break;
210 }
211
212 arg++;
213 while (*ptr == ' ')
214 ptr++;
215
216 /* end of the buffer. */
217 if (*ptr == NULLB)
218 break;
219 }
220 else
221 {
222 buffer[index1] = *ptr; ptr++;
223 index1++;
224 }
225 }
226 if (arg != 0 && arg != 3)
227 return 1;
228
229 *level = level_pri;
230 *window = window_pri;
231 *linker = linker_pri;
232
233 return 0;
234 }
235
236 /** parses a string to set three seg options.
237 * @param ptr buffer containing instructions [in]
238 * @param window returns "window" for seg algorithm. [out]
239 * @param locut returns "locut" for seg. [out]
240 * @param hicut returns "hicut" for seg. [out]
241 */
242 static Int2
s_ParseSegOptions(const char * ptr,Int4 * window,double * locut,double * hicut)243 s_ParseSegOptions(const char *ptr, Int4* window, double* locut, double* hicut)
244
245 {
246 char buffer[BLASTOPTIONS_BUFFER_SIZE];
247 Int4 arg, index, index1;
248
249 arg = 0;
250 index1 = 0;
251 for (index=0; index<BLASTOPTIONS_BUFFER_SIZE; index++)
252 {
253 if (*ptr == ' ' || *ptr == NULLB)
254 {
255 long tmplong;
256 double tmpdouble;
257 buffer[index1] = NULLB;
258 index1 = 0;
259 switch(arg) {
260 case 0:
261 sscanf(buffer, "%ld", &tmplong);
262 *window = tmplong;
263 break;
264 case 1:
265 sscanf(buffer, "%le", &tmpdouble);
266 *locut = tmpdouble;
267 break;
268 case 2:
269 sscanf(buffer, "%le", &tmpdouble);
270 *hicut = tmpdouble;
271 break;
272 default:
273 break;
274 }
275
276 arg++;
277 while (*ptr == ' ')
278 ptr++;
279
280 /* end of the buffer. */
281 if (*ptr == NULLB)
282 break;
283 }
284 else
285 {
286 buffer[index1] = *ptr; ptr++;
287 index1++;
288 }
289 }
290 if (arg != 0 && arg != 3)
291 return 1;
292
293 return 0;
294 }
295
296 /// Wrapper around strcat to ensure we don't do buffer overflows :)
297 /// @param dest string to concatenate to [in|out]
298 /// @param dest_size size of the dest array, modified if dest is grown [in|out]
299 /// @param string2append string to append to dest [in]
300 /// @return the concatenated string or NULL if we run out of memory
301 static char*
s_SafeStrCat(char ** dest,unsigned int * dest_size,const char * string2append)302 s_SafeStrCat(char** dest, unsigned int* dest_size, const char* string2append)
303 {
304 size_t dest_length = strlen(*dest);
305 size_t string2append_length = strlen(string2append);
306 if ((dest_length + string2append_length + 1) > *dest_size) {
307 size_t target_size = MAX(string2append_length, dest_length) * 2;
308 *dest = (char*)realloc((void*)*dest, target_size);
309 if (*dest) {
310 (*dest_size) = target_size;
311 } else {
312 sfree(*dest);
313 return 0;
314 }
315 }
316 strcat(*dest, string2append);
317 return *dest;
318 }
319
320 char*
BlastFilteringOptionsToString(const SBlastFilterOptions * filtering_options)321 BlastFilteringOptionsToString(const SBlastFilterOptions* filtering_options)
322 {
323 char* retval = NULL;
324 unsigned int retval_size = 0;
325
326 if (filtering_options == NULL) {
327 return strdup("F");
328 }
329
330 retval_size = 64; /* Usually this will suffice */
331 retval = (char*) calloc(retval_size, sizeof(char));
332
333 if (filtering_options->dustOptions) {
334 if (filtering_options->dustOptions->level == kDustLevel &&
335 filtering_options->dustOptions->window == kDustWindow &&
336 filtering_options->dustOptions->linker == kDustLinker) {
337 if (!s_SafeStrCat(&retval, &retval_size, "L;")) {
338 return 0;
339 }
340 } else {
341 char buffer[24] = { '\0' };
342 snprintf(buffer, sizeof(buffer), "D %d %d %d;",
343 filtering_options->dustOptions->level,
344 filtering_options->dustOptions->window,
345 filtering_options->dustOptions->linker);
346 if (!s_SafeStrCat(&retval, &retval_size, buffer)) {
347 return 0;
348 }
349 }
350 }
351
352 if (filtering_options->segOptions) {
353 if (filtering_options->segOptions->window == kSegWindow &&
354 filtering_options->segOptions->locut == kSegLocut &&
355 filtering_options->segOptions->hicut == kSegHicut) {
356 if (!s_SafeStrCat(&retval, &retval_size, "L;")) {
357 return 0;
358 }
359 } else {
360 char buffer[24] = { '\0' };
361 snprintf(buffer, sizeof(buffer), "S %d %1.1f %1.1f;",
362 filtering_options->segOptions->window,
363 filtering_options->segOptions->locut,
364 filtering_options->segOptions->hicut);
365 if (!s_SafeStrCat(&retval, &retval_size, buffer)) {
366 return 0;
367 }
368 }
369 }
370
371 if (filtering_options->repeatFilterOptions) {
372 if (filtering_options->repeatFilterOptions->database) {
373 if (!s_SafeStrCat(&retval, &retval_size, "R -d ")) {
374 return 0;
375 }
376 if (!s_SafeStrCat(&retval, &retval_size,
377 filtering_options->repeatFilterOptions->database)) {
378 return 0;
379 }
380 if (!s_SafeStrCat(&retval, &retval_size, ";")) {
381 return 0;
382 }
383 } else {
384 if (!s_SafeStrCat(&retval, &retval_size, "R;")) {
385 return 0;
386 }
387 }
388 }
389
390 if (filtering_options->windowMaskerOptions) {
391 if (filtering_options->windowMaskerOptions->taxid != 0) {
392 char buffer[24] = { '\0' };
393 snprintf(buffer, sizeof(buffer), "W -t %d;",
394 filtering_options->windowMaskerOptions->taxid);
395 if (!s_SafeStrCat(&retval, &retval_size, buffer)) {
396 return 0;
397 }
398 } else if (filtering_options->windowMaskerOptions->database) {
399 if (!s_SafeStrCat(&retval, &retval_size, "W -d ")) {
400 return 0;
401 }
402 if (!s_SafeStrCat(&retval, &retval_size,
403 filtering_options->windowMaskerOptions->database)) {
404 return 0;
405 }
406 if (!s_SafeStrCat(&retval, &retval_size, ";")) {
407 return 0;
408 }
409 }
410 }
411
412 /* Mask at hash is a modifier for other filtering options, as such it
413 * doesn't make sense to apply it by itself */
414 if (SBlastFilterOptionsMaskAtHash(filtering_options)) {
415 if (strlen(retval) != 0) {
416 /* Add mask at hash as a modifier for other filtering options */
417 if (!s_SafeStrCat(&retval, &retval_size, "m;")) {
418 return 0;
419 }
420 } else {
421 /* We still need to set "m" in a filter string (WB-391, WB-394) */
422 /* The string below can be modified into "mF" or "mL" or
423 whatever is decided to be the conventional meaning */
424 if (!s_SafeStrCat(&retval, &retval_size, "m;")) {
425 return 0;
426 }
427 }
428 }
429
430 return strlen(retval) == 0
431 ? s_SafeStrCat(&retval, &retval_size, "F")
432 : retval;
433 }
434
435 Int2
BlastFilteringOptionsFromString(EBlastProgramType program_number,const char * instructions,SBlastFilterOptions ** filtering_options,Blast_Message ** blast_message)436 BlastFilteringOptionsFromString(EBlastProgramType program_number,
437 const char* instructions,
438 SBlastFilterOptions* *filtering_options,
439 Blast_Message* *blast_message)
440 {
441 Boolean mask_at_hash = FALSE; /* the default. */
442 char* buffer;
443 const char* ptr = instructions;
444 char error_buffer[1024];
445 Int2 status = 0;
446 SSegOptions* segOptions = NULL;
447 SDustOptions* dustOptions = NULL;
448 SRepeatFilterOptions* repeatOptions = NULL;
449 SWindowMaskerOptions * winmaskOptions = NULL;
450
451 *filtering_options = NULL;
452 if (blast_message)
453 *blast_message = NULL;
454
455 if (instructions == NULL || strcasecmp(instructions, "F") == 0)
456 {
457 SBlastFilterOptionsNew(filtering_options, eEmpty);
458 return status;
459 }
460
461 buffer = (char*) calloc(strlen(instructions), sizeof(char));
462 /* allow old-style filters when m cannot be followed by the ';' */
463 if (ptr[0] == 'm' && ptr[1] == ' ')
464 {
465 mask_at_hash = TRUE;
466 ptr += 2;
467 }
468
469 while (*ptr != NULLB)
470 {
471 if (*ptr == 'S')
472 {
473 SSegOptionsNew(&segOptions);
474 ptr = s_LoadOptionsToBuffer(ptr+1, buffer);
475 if (buffer[0] != NULLB)
476 {
477 int window = 0;
478 double locut = .0, hicut = .0;
479 status = s_ParseSegOptions(buffer, &window, &locut, &hicut);
480 if (status)
481 {
482 segOptions = SSegOptionsFree(segOptions);
483 sprintf(error_buffer, "Error parsing filter string: %s", buffer);
484 if (blast_message)
485 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
486 error_buffer);
487 sfree(buffer);
488 return status;
489 }
490 segOptions->window = window;
491 segOptions->locut = locut;
492 segOptions->hicut = hicut;
493 }
494 }
495 else if (*ptr == 'D')
496 {
497 SDustOptionsNew(&dustOptions);
498 ptr = s_LoadOptionsToBuffer(ptr+1, buffer);
499 if (buffer[0] != NULLB)
500 {
501 int window = 0, level = 0, linker = 0;
502 status = s_ParseDustOptions(buffer, &level, &window, &linker);
503 if (status)
504 {
505 dustOptions = SDustOptionsFree(dustOptions);
506 sprintf(error_buffer, "Error parsing filter string: %s", buffer);
507 if (blast_message)
508 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
509 error_buffer);
510 sfree(buffer);
511 return status;
512 }
513 dustOptions->level = level;
514 dustOptions->window = window;
515 dustOptions->linker = linker;
516 }
517 }
518 else if (*ptr == 'R')
519 {
520 SRepeatFilterOptionsNew(&repeatOptions);
521 ptr = s_LoadOptionsToBuffer(ptr+1, buffer);
522 if (buffer[0] != NULLB)
523 {
524 char* dbname = NULL;
525 status = s_ParseRepeatOptions(buffer, &dbname);
526 if (status)
527 {
528 repeatOptions = SRepeatFilterOptionsFree(repeatOptions);
529 sprintf(error_buffer, "Error parsing filter string: %s", buffer);
530 if (blast_message)
531 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
532 error_buffer);
533 sfree(buffer);
534 return status;
535 }
536 if (dbname)
537 {
538 sfree(repeatOptions->database);
539 repeatOptions->database = dbname;
540 }
541 }
542 }
543 else if (*ptr == 'W')
544 {
545 SWindowMaskerOptionsNew(&winmaskOptions);
546
547 ptr = s_LoadOptionsToBuffer(ptr+1, buffer);
548 if (buffer[0] != NULLB) {
549 char* dbname = NULL;
550 int taxid = 0;
551
552 status = s_ParseWindowMaskerOptions(buffer, &dbname, &taxid);
553 if (status) {
554 winmaskOptions = SWindowMaskerOptionsFree(winmaskOptions);
555 sprintf(error_buffer, "Error parsing filter string: %s", buffer);
556 if (blast_message)
557 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
558 error_buffer);
559
560 sfree(buffer);
561 return status;
562 }
563 if (dbname) {
564 sfree(winmaskOptions->database);
565 winmaskOptions->database = dbname;
566 }
567 if (taxid) {
568 winmaskOptions->taxid = taxid;
569 }
570 }
571 }
572 else if (*ptr == 'L' || *ptr == 'T')
573 { /* do low-complexity filtering; dust for blastn, otherwise seg.*/
574 if (program_number == eBlastTypeBlastn
575 || program_number == eBlastTypeMapping)
576
577 SDustOptionsNew(&dustOptions);
578 else
579 SSegOptionsNew(&segOptions);
580 ptr++;
581 }
582 else if (*ptr == 'm')
583 {
584 mask_at_hash = TRUE;
585 ptr++;
586 }
587 else
588 { /* Nothing applied */
589 ptr++;
590 }
591 }
592 sfree(buffer);
593
594 status = SBlastFilterOptionsNew(filtering_options, eEmpty);
595 if (status)
596 return status;
597
598 (*filtering_options)->dustOptions = dustOptions;
599 (*filtering_options)->segOptions = segOptions;
600 (*filtering_options)->repeatFilterOptions = repeatOptions;
601 (*filtering_options)->windowMaskerOptions = winmaskOptions;
602 (*filtering_options)->mask_at_hash = mask_at_hash;
603
604 return status;
605 }
606
607
BlastSeqLocNew(BlastSeqLoc ** head,Int4 from,Int4 to)608 BlastSeqLoc* BlastSeqLocNew(BlastSeqLoc** head, Int4 from, Int4 to)
609 {
610 BlastSeqLoc* loc = (BlastSeqLoc*) calloc(1, sizeof(BlastSeqLoc));
611 if ( !loc ) {
612 return NULL;
613 }
614 loc->ssr = (SSeqRange*) calloc(1, sizeof(SSeqRange));
615 loc->ssr->left = from;
616 loc->ssr->right = to;
617
618 return BlastSeqLocAppend(head, loc);
619 }
620
BlastSeqLocAppend(BlastSeqLoc ** head,BlastSeqLoc * node)621 BlastSeqLoc* BlastSeqLocAppend(BlastSeqLoc** head, BlastSeqLoc* node)
622 {
623 if ( !node ) {
624 return NULL;
625 }
626
627 if (head)
628 {
629 if (*head)
630 {
631 BlastSeqLoc* tmp = *head;
632 while (tmp->next)
633 tmp = tmp->next;
634 tmp->next = node;
635 }
636 else
637 {
638 *head = node;
639 }
640 }
641
642 return node;
643 }
644
645 /** Makes a copy of the BlastSeqLoc and also a copy of the
646 * SSRange element. Does not copy BlastSeqLoc that is pointed
647 * to by "next".
648 * @param source the object to be copied [in]
649 * @return another BlastSeqLoc*
650 */
s_BlastSeqLocNodeDup(BlastSeqLoc * source)651 static BlastSeqLoc* s_BlastSeqLocNodeDup(BlastSeqLoc* source)
652 {
653 if ( !source ) {
654 return NULL;
655 }
656 ASSERT(source->ssr);
657 return BlastSeqLocNew(NULL, source->ssr->left, source->ssr->right);
658 }
659
660 /** Calculates number of links in a chain of BlastSeqLoc's.
661 * @param var Chain of BlastSeqLoc structures [in]
662 * @return Number of links in the chain.
663 */
s_BlastSeqLocLen(const BlastSeqLoc * var)664 static Int4 s_BlastSeqLocLen(const BlastSeqLoc* var)
665 {
666 BlastSeqLoc* itr = NULL;
667 Int4 retval = 0;
668
669 for (itr = (BlastSeqLoc*)var; itr; itr = itr->next, retval++) {
670 ;
671 }
672 return retval;
673 }
674
675 /** Converts a BlastSeqLoc list to an array of pointers, each pointing to an
676 * element of the list passed in to this function and the last element points
677 * to NULL
678 * @param list List to convert to an array of pointers [in]
679 * @param count number of elements populated in the array [out]
680 */
681 static BlastSeqLoc**
s_BlastSeqLocListToArrayOfPointers(const BlastSeqLoc * list,Int4 * count)682 s_BlastSeqLocListToArrayOfPointers(const BlastSeqLoc* list, Int4* count)
683 {
684 BlastSeqLoc* tmp,** retval;
685 Int4 i;
686 *count = 0;
687
688 if (list == NULL)
689 return NULL;
690
691 *count = s_BlastSeqLocLen(list);
692 retval = (BlastSeqLoc**) calloc(((size_t)(*count)+1), sizeof(BlastSeqLoc*));
693 for (tmp = (BlastSeqLoc*)list, i = 0; tmp != NULL && i < *count; i++) {
694 retval[i] = tmp;
695 tmp = tmp->next;
696 }
697 return retval;
698 }
699
700 /** Reverse elements in the list
701 * @param head pointer to pointer to the head of the list. [in|out]
702 * (this is not declared static so that it can be tested in the unit tests
703 */
704 NCBI_XBLAST_EXPORT
BlastSeqLocListReverse(BlastSeqLoc ** head)705 void BlastSeqLocListReverse(BlastSeqLoc** head)
706 {
707 BlastSeqLoc** ptrs = NULL; /* array of pointers to BlastSeqLoc elements */
708 Int4 num_elems = 0, i = 0;
709
710 if ( !head ) {
711 return;
712 }
713
714 ptrs = s_BlastSeqLocListToArrayOfPointers(*head, &num_elems);
715 if (num_elems == 0) {
716 return;
717 }
718 ASSERT(ptrs);
719 *head = ptrs[num_elems-1];
720 for (i = num_elems-1; i > 0; i--) {
721 ptrs[i]->next = ptrs[i-1];
722 }
723 ptrs[0]->next = NULL;
724 sfree(ptrs);
725 }
726
BlastSeqLocNodeFree(BlastSeqLoc * loc)727 BlastSeqLoc* BlastSeqLocNodeFree(BlastSeqLoc* loc)
728 {
729 if ( !loc ) {
730 return NULL;
731 }
732 sfree(loc->ssr);
733 sfree(loc);
734 return NULL;
735 }
736
BlastSeqLocFree(BlastSeqLoc * loc)737 BlastSeqLoc* BlastSeqLocFree(BlastSeqLoc* loc)
738 {
739 while (loc) {
740 BlastSeqLoc* next_loc = loc->next;
741 loc = BlastSeqLocNodeFree(loc);
742 loc = next_loc;
743 }
744 return NULL;
745 }
746
BlastSeqLocListDup(BlastSeqLoc * head)747 BlastSeqLoc* BlastSeqLocListDup(BlastSeqLoc* head)
748 {
749 BlastSeqLoc* retval = NULL;
750 BlastSeqLoc* retval_tail = NULL;
751
752 for (; head; head = head->next) {
753 retval_tail = BlastSeqLocAppend(retval_tail ? &retval_tail : &retval,
754 s_BlastSeqLocNodeDup(head));
755 }
756
757 return retval;
758 }
759
BlastMaskLocNew(Int4 total)760 BlastMaskLoc* BlastMaskLocNew(Int4 total)
761 {
762 BlastMaskLoc* retval = (BlastMaskLoc *) calloc(1, sizeof(BlastMaskLoc));
763 retval->total_size = total;
764 if (total > 0)
765 retval->seqloc_array = (BlastSeqLoc **) calloc(total,
766 sizeof(BlastSeqLoc *));
767 return retval;
768 }
769
BlastMaskLocDup(const BlastMaskLoc * mask_loc)770 BlastMaskLoc* BlastMaskLocDup(const BlastMaskLoc* mask_loc)
771 {
772 BlastMaskLoc* retval = NULL;
773 Int4 index = 0;
774
775 if ( !mask_loc ) {
776 return NULL;
777 }
778
779 retval = BlastMaskLocNew(mask_loc->total_size);
780
781 for (index = 0; index < mask_loc->total_size; index++) {
782 retval->seqloc_array[index] =
783 BlastSeqLocListDup(mask_loc->seqloc_array[index]);
784 }
785
786 return retval;
787 }
788
BlastMaskLocFree(BlastMaskLoc * mask_loc)789 BlastMaskLoc* BlastMaskLocFree(BlastMaskLoc* mask_loc)
790 {
791 Int4 index;
792
793 if (mask_loc == NULL)
794 return NULL;
795
796 for (index=0; index<mask_loc->total_size; index++)
797 {
798 if (mask_loc->seqloc_array != NULL)
799 BlastSeqLocFree(mask_loc->seqloc_array[index]);
800 }
801 sfree(mask_loc->seqloc_array);
802 sfree(mask_loc);
803 return NULL;
804 }
805
BlastMaskLocDNAToProtein(BlastMaskLoc * mask_loc,const BlastQueryInfo * query_info)806 Int2 BlastMaskLocDNAToProtein(BlastMaskLoc* mask_loc,
807 const BlastQueryInfo* query_info)
808 {
809 Uint4 seq_index;
810 BlastSeqLoc* dna_seqlocs[NUM_FRAMES];
811
812 if (!mask_loc)
813 return 0;
814
815 /* Check that the array size in BlastMaskLoc corresponds to the number
816 of contexts in BlastQueryInfo. */
817 ASSERT(mask_loc->total_size == query_info->last_context + 1);
818
819 /* Loop over multiple DNA sequences */
820 for (seq_index = 0; seq_index < (Uint4)query_info->num_queries;
821 ++seq_index) {
822 const Uint4 ctx_idx = NUM_FRAMES * seq_index;
823 const Int4 dna_length = BlastQueryInfoGetQueryLength(query_info,
824 eBlastTypeBlastx,
825 seq_index);
826 Int4 context;
827
828 /* Save the DNA masking locations, as they'll be freed and overwritten
829 * by their translations */
830 memset((void*) &dna_seqlocs, 0, sizeof(dna_seqlocs));
831 memcpy((void*) &dna_seqlocs,
832 (void*) &mask_loc->seqloc_array[ctx_idx],
833 sizeof(dna_seqlocs));
834 memset((void*) &mask_loc->seqloc_array[ctx_idx], 0, sizeof(dna_seqlocs));
835
836 /* Reproduce this mask for all 6 frames, with translated coordinates */
837 for (context = 0; context < NUM_FRAMES; ++context) {
838 const Int2 frame = BLAST_ContextToFrame(eBlastTypeBlastx, context);
839 BlastSeqLoc* frame_seqloc = dna_seqlocs[context];
840 BlastSeqLoc* prot_tail = NULL;
841 BlastSeqLoc* itr = NULL;
842
843 /* If no masks were provided for some frames, use the first one */
844 if (frame_seqloc == NULL && dna_seqlocs[0]) {
845 frame_seqloc = dna_seqlocs[0];
846 }
847 for (itr = frame_seqloc; itr; itr = itr->next) {
848 Int4 from, to;
849 SSeqRange* seq_range = itr->ssr;
850 /* masks should be 0-offset */
851 ASSERT(seq_range->right < dna_length);
852 ASSERT(seq_range->left >= 0);
853 if (frame < 0) {
854 from = (dna_length + frame - seq_range->right)/CODON_LENGTH;
855 to = (dna_length + frame - seq_range->left)/CODON_LENGTH;
856 } else {
857 from = (seq_range->left - frame + 1)/CODON_LENGTH;
858 to = (seq_range->right - frame + 1)/CODON_LENGTH;
859 }
860
861 if (from < 0)
862 from = 0;
863 if (to < 0)
864 to = 0;
865 if (from >= query_info->contexts[ctx_idx+context].query_length)
866 from = query_info->contexts[ctx_idx+context].query_length - 1;
867 if (to >= query_info->contexts[ctx_idx+context].query_length)
868 to = query_info->contexts[ctx_idx+context].query_length - 1;
869
870 ASSERT(from >= 0);
871 ASSERT(to >= 0);
872 ASSERT(from < query_info->contexts[ctx_idx+context].query_length);
873 ASSERT(to < query_info->contexts[ctx_idx+context].query_length);
874
875 /* Cache the tail of the list to avoid the overhead of
876 * traversing the list when appending to it */
877 prot_tail = BlastSeqLocNew((prot_tail
878 ? & prot_tail
879 : & mask_loc->seqloc_array[ctx_idx+context]),
880 from, to);
881 }
882 }
883 for (context = 0; context < NUM_FRAMES; ++context) {
884 BlastSeqLocFree(dna_seqlocs[context]);
885 }
886 }
887
888 return 0;
889 }
890
891
BlastMaskLocProteinToDNA(BlastMaskLoc * mask_loc,const BlastQueryInfo * query_info)892 Int2 BlastMaskLocProteinToDNA(BlastMaskLoc* mask_loc,
893 const BlastQueryInfo* query_info)
894 {
895 Int2 status = 0;
896 Int4 index;
897
898 /* If there is not mask, there is nothing to convert to DNA coordinates,
899 hence just return. */
900 if (!mask_loc)
901 return 0;
902
903 /* Check that the array size in BlastMaskLoc corresponds to the number
904 of contexts in BlastQueryInfo. */
905 ASSERT(mask_loc->total_size == query_info->last_context + 1);
906
907 /* Loop over all DNA sequences */
908 for (index=0; index < query_info->num_queries; ++index)
909 {
910 Int4 frame_start = index*NUM_FRAMES;
911 Int4 frame_index;
912 Int4 dna_length = BlastQueryInfoGetQueryLength(query_info,
913 eBlastTypeBlastx,
914 index);
915 /* Loop over all frames of one DNA sequence */
916 for (frame_index=frame_start; frame_index<(frame_start+NUM_FRAMES);
917 frame_index++) {
918 BlastSeqLoc* loc;
919 Int2 frame =
920 BLAST_ContextToFrame(eBlastTypeBlastx, frame_index % NUM_FRAMES);
921 /* Loop over all mask locations for a given frame */
922 for (loc = mask_loc->seqloc_array[frame_index]; loc; loc = loc->next) {
923 Int4 from=0, to=0;
924 SSeqRange* seq_range = loc->ssr;
925 if (frame < 0) {
926 to = dna_length - CODON_LENGTH*seq_range->left + frame;
927 from = dna_length - CODON_LENGTH*seq_range->right + frame + 1;
928 } else {
929 from = CODON_LENGTH*seq_range->left + frame - 1;
930 to = CODON_LENGTH*seq_range->right + frame - 1;
931 }
932
933 if (from < 0)
934 from = 0;
935 if (to < 0)
936 to = 0;
937 if (from >= dna_length)
938 from = dna_length - 1;
939 if (to >= dna_length)
940 to = dna_length - 1;
941
942 ASSERT(from >= 0);
943 ASSERT(to >= 0);
944 ASSERT(from < dna_length);
945 ASSERT(to < dna_length);
946
947 seq_range->left = from;
948 seq_range->right = to;
949 }
950 }
951 }
952 return status;
953 }
954
955 /** Used for qsort, compares two SeqLoc's by starting position. */
s_SeqRangeSortByStartPosition(const void * vp1,const void * vp2)956 static int s_SeqRangeSortByStartPosition(const void *vp1, const void *vp2)
957 {
958 BlastSeqLoc* v1 = *((BlastSeqLoc**) vp1);
959 BlastSeqLoc* v2 = *((BlastSeqLoc**) vp2);
960 SSeqRange* loc1 = (SSeqRange*) v1->ssr;
961 SSeqRange* loc2 = (SSeqRange*) v2->ssr;
962
963 if (loc1->left < loc2->left)
964 return -1;
965 else if (loc1->left > loc2->left)
966 return 1;
967 else
968 return 0;
969 }
970
971 void
BlastSeqLocCombine(BlastSeqLoc ** mask_loc,Int4 link_value)972 BlastSeqLocCombine(BlastSeqLoc** mask_loc, Int4 link_value)
973 {
974 BlastSeqLoc** ptrs = NULL;
975 Int4 i = 0, num_elems = 0;
976
977 /* Break up the list into an array of pointers and sort it */
978 ptrs = s_BlastSeqLocListToArrayOfPointers(*mask_loc, &num_elems);
979 if (num_elems == 0) {
980 return;
981 }
982 ASSERT(ptrs);
983 qsort(ptrs, (size_t)num_elems, sizeof(*ptrs),
984 s_SeqRangeSortByStartPosition);
985
986 /* Merge the overlapping elements */
987 {
988 BlastSeqLoc* curr_tail = *mask_loc = ptrs[0];
989 for (i = 0; i < num_elems - 1; i++) {
990 const SSeqRange* next_ssr = ptrs[i+1]->ssr;
991 const Int4 stop = curr_tail->ssr->right;
992
993 if ((stop + link_value) > next_ssr->left) {
994 curr_tail->ssr->right = MAX(stop, next_ssr->right);
995 ptrs[i+1] = BlastSeqLocNodeFree(ptrs[i+1]);
996 } else {
997 curr_tail = ptrs[i+1];
998 }
999 }
1000 }
1001
1002 /* Rebuild the linked list */
1003 {
1004 BlastSeqLoc* tail = *mask_loc;
1005 for (i = 1; i < num_elems; i++) {
1006 if (ptrs[i]) {
1007 tail->next = ptrs[i];
1008 tail = ptrs[i];
1009 }
1010 }
1011 tail->next = NULL;
1012 }
1013 sfree(ptrs);
1014 }
1015
1016 Int2
BLAST_ComplementMaskLocations(EBlastProgramType program_number,const BlastQueryInfo * query_info,const BlastMaskLoc * mask_loc,BlastSeqLoc ** complement_mask)1017 BLAST_ComplementMaskLocations(EBlastProgramType program_number,
1018 const BlastQueryInfo* query_info,
1019 const BlastMaskLoc* mask_loc, BlastSeqLoc* *complement_mask)
1020 {
1021 Int4 context;
1022 const Boolean kIsNucl = (program_number == eBlastTypeBlastn ||
1023 program_number == eBlastTypeMapping);
1024 BlastSeqLoc* tail = NULL; /* Pointer to the tail of the complement_mask
1025 linked list */
1026
1027 if (complement_mask == NULL)
1028 return -1;
1029
1030 *complement_mask = NULL;
1031
1032 for (context = query_info->first_context;
1033 context <= query_info->last_context; ++context) {
1034
1035 Boolean first = TRUE; /* Specifies beginning of query. */
1036 Boolean last_interval_open=TRUE; /* if TRUE last interval needs to be closed. */
1037 Int4 start_offset, end_offset, filter_start, filter_end;
1038 Int4 left=0, right; /* Used for left/right extent of a region. */
1039 BlastSeqLoc* loc = NULL;
1040
1041 if (query_info->contexts[context].is_valid == FALSE) {
1042 continue;
1043 }
1044
1045 start_offset = query_info->contexts[context].query_offset;
1046 end_offset = query_info->contexts[context].query_length
1047 + start_offset - 1;
1048 ASSERT(start_offset <= end_offset);
1049
1050 /* mask_loc NULL is simply the case that NULL was passed in, which we
1051 take to mean that nothing on query is masked. */
1052 if (mask_loc == NULL || mask_loc->seqloc_array[context] == NULL) {
1053 /* Cache the tail of the list to avoid the overhead of traversing the
1054 * list when appending to it */
1055 tail = BlastSeqLocNew(tail ? &tail : complement_mask,
1056 start_offset, end_offset);
1057 continue;
1058 }
1059
1060 if (BlastIsReverseStrand(kIsNucl, context)) {
1061 BlastSeqLocListReverse(&mask_loc->seqloc_array[context]);
1062 }
1063 loc = mask_loc->seqloc_array[context];
1064
1065 first = TRUE;
1066 for ( ; loc; loc = loc->next) {
1067 SSeqRange* seq_range = loc->ssr;
1068 if (BlastIsReverseStrand(kIsNucl, context)) {
1069 filter_start = end_offset - seq_range->right;
1070 filter_end = end_offset - seq_range->left;
1071 } else {
1072 filter_start = start_offset + seq_range->left;
1073 filter_end = start_offset + seq_range->right;
1074 }
1075 /* The canonical "state" at the top of this
1076 while loop is that both "left" and "right" have
1077 been initialized to their correct values.
1078 The first time this loop is entered in a call to
1079 the function this is not true and the following "if"
1080 statement moves everything to the canonical state. */
1081 if (first) {
1082 last_interval_open = TRUE;
1083 first = FALSE;
1084
1085 if (filter_start > start_offset) {
1086 /* beginning of sequence not filtered */
1087 left = start_offset;
1088 } else {
1089 /* beginning of sequence filtered */
1090 left = filter_end + 1;
1091 continue;
1092 }
1093 }
1094
1095 right = filter_start - 1;
1096
1097 /* Cache the tail of the list to avoid the overhead of traversing the
1098 * list when appending to it */
1099 tail = BlastSeqLocNew((tail ? &tail : complement_mask), left, right);
1100 if (filter_end >= end_offset) {
1101 /* last masked region at end of sequence */
1102 last_interval_open = FALSE;
1103 break;
1104 } else {
1105 left = filter_end + 1;
1106 }
1107 }
1108
1109 if (last_interval_open) {
1110 /* Need to finish SSeqRange* for last interval. */
1111 right = end_offset;
1112 /* Cache the tail of the list to avoid the overhead of traversing the
1113 * list when appending to it */
1114 tail = BlastSeqLocNew((tail ? &tail : complement_mask), left, right);
1115 }
1116 }
1117 return 0;
1118 }
1119
1120
1121 Int2
BlastSetUp_Filter(EBlastProgramType program_number,Uint1 * sequence,Int4 length,Int4 offset,const SBlastFilterOptions * filter_options,BlastSeqLoc ** seqloc_retval,Blast_Message ** blast_message)1122 BlastSetUp_Filter(EBlastProgramType program_number,
1123 Uint1* sequence,
1124 Int4 length,
1125 Int4 offset,
1126 const SBlastFilterOptions* filter_options,
1127 BlastSeqLoc** seqloc_retval,
1128 Blast_Message* *blast_message)
1129 {
1130 Int2 status=0; /* return value. */
1131
1132 ASSERT(filter_options);
1133 ASSERT(seqloc_retval);
1134
1135 *seqloc_retval = NULL;
1136
1137 status = SBlastFilterOptionsValidate(program_number, filter_options,
1138 blast_message);
1139 if (status)
1140 return status;
1141
1142 if (filter_options->segOptions)
1143 {
1144 SSegOptions* seg_options = filter_options->segOptions;
1145 SegParameters* sparamsp=NULL;
1146
1147 sparamsp = SegParametersNewAa();
1148 sparamsp->overlaps = TRUE;
1149 if (seg_options->window > 0)
1150 sparamsp->window = seg_options->window;
1151 if (seg_options->locut > 0.0)
1152 sparamsp->locut = seg_options->locut;
1153 if (seg_options->hicut > 0.0)
1154 sparamsp->hicut = seg_options->hicut;
1155
1156 status = SeqBufferSeg(sequence, length, offset, sparamsp,
1157 seqloc_retval);
1158 SegParametersFree(sparamsp);
1159 sparamsp = NULL;
1160 }
1161
1162 if (filter_options->readQualityOptions) {
1163 status = FilterQueriesForMapping(sequence, length, offset,
1164 filter_options->readQualityOptions,
1165 seqloc_retval);
1166 }
1167
1168
1169 return status;
1170 }
1171
1172 void
BlastSeqLocReverse(BlastSeqLoc * masks,Int4 query_length)1173 BlastSeqLocReverse(BlastSeqLoc* masks, Int4 query_length)
1174 {
1175 for(; masks; masks = masks->next) {
1176 masks->ssr->left = query_length - 1 - masks->ssr->right;
1177 masks->ssr->right = query_length - 1 - masks->ssr->left;
1178 }
1179 }
1180
1181 /** Calculates the mask locations one context at a time.
1182 * @param query_blk sequence [in]
1183 * @param query_info information about sequences [in]
1184 * @param context which context is this? [in]
1185 * @param program_number program (blastn, blastp, etc.) [in]
1186 * @param filter_options instructions for producing mask [in]
1187 * @param filter_out results of filtering operations [out]
1188 * @param blast_message any error or warning messages [out]
1189 * @return zero on success
1190 */
1191 static Int2
s_GetFilteringLocationsForOneContext(BLAST_SequenceBlk * query_blk,const BlastQueryInfo * query_info,Int4 context,EBlastProgramType program_number,const SBlastFilterOptions * filter_options,BlastSeqLoc ** filter_out,Blast_Message ** blast_message)1192 s_GetFilteringLocationsForOneContext(BLAST_SequenceBlk* query_blk,
1193 const BlastQueryInfo* query_info,
1194 Int4 context,
1195 EBlastProgramType program_number,
1196 const SBlastFilterOptions* filter_options,
1197 BlastSeqLoc* *filter_out,
1198 Blast_Message* *blast_message)
1199 {
1200 Int2 status = 0;
1201 Int4 query_length = 0; /* Length of query described by SeqLocPtr. */
1202 Int4 context_offset;
1203 Uint1 *buffer; /* holds sequence for plus strand or protein. */
1204
1205 const Boolean kIsNucl = (program_number == eBlastTypeBlastn ||
1206 program_number == eBlastTypeMapping);
1207
1208 context_offset = query_info->contexts[context].query_offset;
1209 buffer = &query_blk->sequence[context_offset];
1210
1211 if (query_info->contexts[context].is_valid == FALSE) {
1212 return 0;
1213 }
1214
1215 query_length = query_info->contexts[context].query_length;
1216
1217 status = BlastSetUp_Filter(program_number,
1218 buffer,
1219 query_length,
1220 0,
1221 filter_options,
1222 filter_out,
1223 blast_message);
1224 if (status)
1225 return status;
1226
1227 if (BlastIsReverseStrand(kIsNucl, context) == TRUE) {
1228 /* Reverse this as it's on minus strand. */
1229 BlastSeqLocReverse(*filter_out, query_length);
1230 }
1231
1232 /* Extract the mask locations corresponding to this query
1233 (frame, strand), detach it from other masks.
1234 NB: for translated search the mask locations are expected in
1235 protein coordinates. The nucleotide locations must be converted
1236 to protein coordinates prior to the call to BLAST_MainSetUp.
1237 */
1238 {
1239 /* Auxiliary locations for lower-case masking or any other masking
1240 * which occurred outside of CORE BLAST */
1241 BlastSeqLoc *lcase_mask_slp = NULL;
1242 if (query_blk->lcase_mask && query_blk->lcase_mask->seqloc_array)
1243 {
1244 ASSERT(context < query_blk->lcase_mask->total_size);
1245 lcase_mask_slp = query_blk->lcase_mask->seqloc_array[context];
1246 /* Set location list to NULL, to allow safe memory deallocation,
1247 ownership transferred to filter_out below. */
1248 query_blk->lcase_mask->seqloc_array[context] = NULL;
1249 }
1250
1251 /* Attach the lower case mask locations to the filter locations and
1252 combine them */
1253 BlastSeqLocAppend(filter_out, lcase_mask_slp);
1254 }
1255
1256 BlastSeqLocCombine(filter_out, 0);
1257
1258 return 0;
1259 }
1260
1261 Int2
BlastSetUp_GetFilteringLocations(BLAST_SequenceBlk * query_blk,const BlastQueryInfo * query_info,EBlastProgramType program_number,const SBlastFilterOptions * filter_options,BlastMaskLoc ** filter_maskloc,Blast_Message ** blast_message)1262 BlastSetUp_GetFilteringLocations(BLAST_SequenceBlk* query_blk,
1263 const BlastQueryInfo* query_info,
1264 EBlastProgramType program_number,
1265 const SBlastFilterOptions* filter_options,
1266 BlastMaskLoc** filter_maskloc,
1267 Blast_Message** blast_message)
1268 {
1269 Int2 status = 0;
1270 Int4 context = 0; /* loop variable. */
1271 const int kNumContexts = query_info->last_context + 1;
1272
1273 ASSERT(query_info && query_blk && filter_maskloc);
1274
1275 ASSERT(blast_message);
1276 ASSERT(kNumContexts ==
1277 query_info->num_queries*BLAST_GetNumberOfContexts(program_number));
1278 *filter_maskloc = BlastMaskLocNew(kNumContexts);
1279
1280 for (context = query_info->first_context;
1281 context <= query_info->last_context; ++context) {
1282
1283 BlastSeqLoc *filter_per_context = NULL;
1284 status = s_GetFilteringLocationsForOneContext(query_blk,
1285 query_info,
1286 context,
1287 program_number,
1288 filter_options,
1289 &filter_per_context,
1290 blast_message);
1291 if (status) {
1292 Blast_MessageWrite(blast_message, eBlastSevError, context,
1293 "Failure at filtering");
1294 return status;
1295 }
1296
1297 /* NB: for translated searches filter locations are returned in
1298 protein coordinates, because the DNA lengths of sequences are
1299 not available here. The caller must take care of converting
1300 them back to nucleotide coordinates. */
1301 (*filter_maskloc)->seqloc_array[context] = filter_per_context;
1302 }
1303 return 0;
1304 }
1305
1306 void
Blast_MaskTheResidues(Uint1 * buffer,Int4 length,Boolean is_na,const BlastSeqLoc * mask_loc,Boolean reverse,Int4 offset)1307 Blast_MaskTheResidues(Uint1 * buffer, Int4 length, Boolean is_na,
1308 const BlastSeqLoc* mask_loc, Boolean reverse, Int4 offset)
1309 {
1310 const Uint1 kMaskingLetter = is_na ? kNuclMask : kProtMask;
1311 ASSERT(buffer);
1312 for (; mask_loc; mask_loc = mask_loc->next) {
1313
1314 Int4 index, start, stop;
1315
1316 if (reverse) {
1317 start = length - 1 - mask_loc->ssr->right;
1318 stop = length - 1 - mask_loc->ssr->left;
1319 } else {
1320 start = mask_loc->ssr->left;
1321 stop = mask_loc->ssr->right;
1322 }
1323
1324 start -= offset;
1325 stop -= offset;
1326
1327 ASSERT(start < length);
1328 ASSERT(stop <= length);
1329
1330 for (index = start; index <= stop; index++)
1331 buffer[index] = kMaskingLetter;
1332 }
1333 }
1334
1335 void
Blast_MaskUnsupportedAA(BLAST_SequenceBlk * seq,Uint1 min_invalid)1336 Blast_MaskUnsupportedAA(BLAST_SequenceBlk* seq, Uint1 min_invalid)
1337 {
1338 Uint1 *sequence = seq->sequence;
1339 Int4 length = seq->length;
1340 Int4 i;
1341
1342 for (i = 0; i < length; i++) {
1343 if (sequence[i] >= min_invalid) {
1344 sequence[i] = kProtMask;
1345 }
1346 }
1347 }
1348
1349 void
BlastSetUp_MaskQuery(BLAST_SequenceBlk * query_blk,const BlastQueryInfo * query_info,const BlastMaskLoc * filter_maskloc,EBlastProgramType program_number)1350 BlastSetUp_MaskQuery(BLAST_SequenceBlk* query_blk,
1351 const BlastQueryInfo* query_info,
1352 const BlastMaskLoc *filter_maskloc,
1353 EBlastProgramType program_number)
1354 {
1355 const Boolean kIsNucl = (program_number == eBlastTypeBlastn
1356 || program_number == eBlastTypeMapping);
1357 Int4 context; /* loop variable. */
1358 Int4 total_length;
1359 Boolean has_mask = FALSE; /* Check for whether filter_maskloc is empty. */
1360 Int4 index; /* loop variable. */
1361
1362 ASSERT(query_blk);
1363 ASSERT(query_info);
1364 ASSERT(filter_maskloc);
1365
1366
1367 for (index=0; index<filter_maskloc->total_size; index++)
1368 {
1369 if (filter_maskloc->seqloc_array[index])
1370 {
1371 has_mask = TRUE;
1372 break;
1373 }
1374 }
1375 if (has_mask == FALSE)
1376 return;
1377
1378
1379 total_length = query_info->contexts[query_info->last_context].query_offset
1380 + query_info->contexts[query_info->last_context].query_length + 2;
1381 query_blk->sequence_start_nomask = BlastMemDup(query_blk->sequence_start, total_length);
1382 query_blk->sequence_nomask = query_blk->sequence_start_nomask + 1;
1383 query_blk->nomask_allocated = TRUE;
1384
1385 for (context = query_info->first_context;
1386 context <= query_info->last_context; ++context) {
1387
1388 Int4 query_length = 0;
1389 Int4 context_offset = 0;
1390 Uint1 *buffer = NULL; /* holds sequence */
1391
1392 if (query_info->contexts[context].is_valid == FALSE) {
1393 continue;
1394 }
1395
1396 query_length = query_info->contexts[context].query_length;
1397
1398 context_offset = query_info->contexts[context].query_offset;
1399 buffer = &query_blk->sequence[context_offset];
1400 ASSERT(buffer);
1401
1402 Blast_MaskTheResidues(buffer, query_length, kIsNucl,
1403 filter_maskloc->seqloc_array[context],
1404 BlastIsReverseStrand(kIsNucl, context), 0);
1405 }
1406 }
1407