1 /* $Id: blast_options.c 567495 2018-07-19 13:19:39Z fongah2 $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 */
26
27 /** @file blast_options.c
28 * The structures and functions in blast_options.[ch] should be used to specify
29 * user preferences. The options structures should not be changed by the BLAST code
30 * but rather be read to determine user preferences. When possible these structures
31 * should be passed in as "const".
32 *
33 */
34
35 #include <algo/blast/core/blast_options.h>
36 #include <algo/blast/core/blast_filter.h>
37 #include <algo/blast/core/blast_stat.h>
38 #include <algo/blast/composition_adjustment/composition_constants.h>
39 #include <algo/blast/core/hspfilter_collector.h>
40 #include <algo/blast/core/hspfilter_besthit.h>
41 #include <algo/blast/core/blast_util.h>
42
43 const double kPSSM_NoImpalaScaling = 1.0;
44
45 /** Declared in blast_def.h as extern const. */
46 const int kDustLevel = 20;
47 const int kDustWindow = 64;
48 const int kDustLinker = 1;
49
SDustOptionsFree(SDustOptions * dust_options)50 SDustOptions* SDustOptionsFree(SDustOptions* dust_options)
51 {
52 if (dust_options)
53 sfree(dust_options);
54 return NULL;
55 }
56
SDustOptionsNew(SDustOptions ** dust_options)57 Int2 SDustOptionsNew(SDustOptions* *dust_options)
58 {
59 if (dust_options == NULL)
60 return 1;
61
62 *dust_options = (SDustOptions*) malloc(sizeof(SDustOptions));
63 (*dust_options)->level = kDustLevel;
64 (*dust_options)->window = kDustWindow;
65 (*dust_options)->linker = kDustLinker;
66
67 return 0;
68 }
69
SSegOptionsFree(SSegOptions * seg_options)70 SSegOptions* SSegOptionsFree(SSegOptions* seg_options)
71 {
72 if (seg_options)
73 sfree(seg_options);
74 return NULL;
75 }
76
SSegOptionsNew(SSegOptions ** seg_options)77 Int2 SSegOptionsNew(SSegOptions* *seg_options)
78 {
79 if (seg_options == NULL)
80 return 1;
81
82 *seg_options = (SSegOptions*) malloc(sizeof(SSegOptions));
83 (*seg_options)->window = kSegWindow;
84 (*seg_options)->locut = kSegLocut;
85 (*seg_options)->hicut = kSegHicut;
86
87 return 0;
88 }
89
SWindowMaskerOptionsNew(SWindowMaskerOptions ** winmask_options)90 Int2 SWindowMaskerOptionsNew(SWindowMaskerOptions ** winmask_options)
91 {
92 if (winmask_options) {
93 *winmask_options = (SWindowMaskerOptions*) calloc(1, sizeof(SWindowMaskerOptions));
94 if (*winmask_options == NULL)
95 return BLASTERR_MEMORY;
96
97 (*winmask_options)->taxid = 0;
98 (*winmask_options)->database = NULL;
99 return 0;
100 }
101 return 1;
102 }
103
SWindowMaskerOptionsFree(SWindowMaskerOptions * winmask_options)104 SWindowMaskerOptions* SWindowMaskerOptionsFree(SWindowMaskerOptions* winmask_options)
105 {
106 if (winmask_options)
107 {
108 if (winmask_options->database)
109 {
110 sfree(winmask_options->database);
111 }
112 sfree(winmask_options);
113 }
114 return NULL;
115 }
116
SRepeatFilterOptionsFree(SRepeatFilterOptions * repeat_options)117 SRepeatFilterOptions* SRepeatFilterOptionsFree(SRepeatFilterOptions* repeat_options)
118 {
119 if (repeat_options)
120 {
121 sfree(repeat_options->database);
122 sfree(repeat_options);
123 }
124 return NULL;
125 }
126
SRepeatFilterOptionsNew(SRepeatFilterOptions ** repeat_options)127 Int2 SRepeatFilterOptionsNew(SRepeatFilterOptions* *repeat_options)
128 {
129
130 if (repeat_options == NULL)
131 return 1;
132
133 *repeat_options = (SRepeatFilterOptions*) calloc(1, sizeof(SRepeatFilterOptions));
134 if (*repeat_options == NULL)
135 return BLASTERR_MEMORY;
136
137 (*repeat_options)->database = strdup(kDefaultRepeatFilterDb);
138
139 return 0;
140 }
141
SRepeatFilterOptionsResetDB(SRepeatFilterOptions ** repeat_options,const char * db)142 Int2 SRepeatFilterOptionsResetDB(SRepeatFilterOptions* *repeat_options, const char* db)
143 {
144 Int2 status=0;
145
146 if (*repeat_options == NULL)
147 status = SRepeatFilterOptionsNew(repeat_options);
148
149 if (status)
150 return status;
151
152 sfree((*repeat_options)->database);
153 (*repeat_options)->database = strdup(db);
154
155 return status;
156 }
157
SWindowMaskerOptionsResetDB(SWindowMaskerOptions ** winmask_options,const char * db)158 Int2 SWindowMaskerOptionsResetDB(SWindowMaskerOptions ** winmask_options, const char* db)
159 {
160 Int2 status=0;
161
162 if (*winmask_options == NULL)
163 status = SWindowMaskerOptionsNew(winmask_options);
164
165 if (status)
166 return status;
167
168 sfree((*winmask_options)->database);
169
170 if (db) {
171 (*winmask_options)->database = strdup(db);
172 }
173
174 return status;
175 }
176
SReadQualityOptionsFree(SReadQualityOptions * read_quality_options)177 SReadQualityOptions* SReadQualityOptionsFree(
178 SReadQualityOptions* read_quality_options)
179 {
180 if (read_quality_options) {
181 free(read_quality_options);
182 }
183
184 return NULL;
185 }
186
SReadQualityOptionsNew(SReadQualityOptions ** read_quality_options)187 Int2 SReadQualityOptionsNew(SReadQualityOptions** read_quality_options)
188 {
189 if (!read_quality_options) {
190 return 1;
191 }
192
193 *read_quality_options = calloc(1, sizeof(SReadQualityOptions));
194 if (!*read_quality_options) {
195 return 1;
196 }
197
198 (*read_quality_options)->frac_ambig = 0.5;
199 (*read_quality_options)->entropy = 16;
200
201 return 0;
202 }
203
SBlastFilterOptionsFree(SBlastFilterOptions * filter_options)204 SBlastFilterOptions* SBlastFilterOptionsFree(SBlastFilterOptions* filter_options)
205 {
206 if (filter_options)
207 {
208 filter_options->dustOptions =
209 SDustOptionsFree(filter_options->dustOptions);
210 filter_options->segOptions =
211 SSegOptionsFree(filter_options->segOptions);
212 filter_options->repeatFilterOptions =
213 SRepeatFilterOptionsFree(filter_options->repeatFilterOptions);
214 filter_options->windowMaskerOptions =
215 SWindowMaskerOptionsFree(filter_options->windowMaskerOptions);
216 filter_options->readQualityOptions =
217 SReadQualityOptionsFree(filter_options->readQualityOptions);
218 sfree(filter_options);
219 }
220
221 return NULL;
222 }
223
SBlastFilterOptionsNew(SBlastFilterOptions ** filter_options,EFilterOptions type)224 Int2 SBlastFilterOptionsNew(SBlastFilterOptions* *filter_options, EFilterOptions type)
225 {
226 Int2 status = 0;
227
228 if (filter_options)
229 {
230 *filter_options = (SBlastFilterOptions*) calloc(1, sizeof(SBlastFilterOptions));
231 (*filter_options)->mask_at_hash = FALSE;
232 if (type == eSeg)
233 SSegOptionsNew(&((*filter_options)->segOptions));
234 if (type == eDust || type == eDustRepeats)
235 SDustOptionsNew(&((*filter_options)->dustOptions));
236 if (type == eRepeats || type == eDustRepeats)
237 SRepeatFilterOptionsNew(&((*filter_options)->repeatFilterOptions));
238 }
239 else
240 status = 1;
241
242 return status;
243 }
244
245
246 /** Merges together two sets of dust options, choosing the most non-default one.
247 *
248 * @param opt1 first set to be merged [in]
249 * @param opt2 second set to be merged [in]
250 * @return the merged options.
251 */
s_MergeDustOptions(const SDustOptions * opt1,const SDustOptions * opt2)252 static SDustOptions* s_MergeDustOptions(const SDustOptions* opt1, const SDustOptions* opt2)
253 {
254 SDustOptions* retval = NULL;
255
256 if (!opt1 && !opt2)
257 return NULL;
258
259 SDustOptionsNew(&retval);
260
261 if (opt1 && !opt2)
262 {
263 retval->level = opt1->level;
264 retval->window = opt1->window;
265 retval->linker = opt1->linker;
266 }
267 else if (!opt1 && opt2)
268 {
269 retval->level = opt2->level;
270 retval->window = opt2->window;
271 retval->linker = opt2->linker;
272 }
273 else
274 {
275 retval->level = (opt1->level != kDustLevel) ? opt1->level : opt2->level;
276 retval->window = (opt1->window != kDustWindow) ? opt1->window : opt2->window;
277 retval->linker = (opt1->linker != kDustLinker) ? opt1->linker : opt2->linker;
278 }
279
280 return retval;
281 }
282
283
284 /** Merges together two sets of SEG options, choosing the most non-default one.
285 *
286 * @param opt1 first set to be merged [in]
287 * @param opt2 second set to be merged [in]
288 * @return the merged options.
289 */
s_MergeSegOptions(const SSegOptions * opt1,const SSegOptions * opt2)290 static SSegOptions* s_MergeSegOptions(const SSegOptions* opt1, const SSegOptions* opt2)
291 {
292 SSegOptions* retval = NULL;
293
294 if (!opt1 && !opt2)
295 return NULL;
296
297 SSegOptionsNew(&retval);
298
299 if (opt1 && !opt2)
300 {
301 retval->window = opt1->window;
302 retval->locut = opt1->locut;
303 retval->hicut = opt1->hicut;
304 }
305 else if (!opt1 && opt2)
306 {
307 retval->window = opt2->window;
308 retval->locut = opt2->locut;
309 retval->hicut = opt2->hicut;
310 }
311 else
312 {
313 retval->window = (opt1->window != kSegWindow) ? opt1->window : opt2->window;
314 retval->locut = (opt1->locut != kSegLocut) ? opt1->locut : opt2->locut;
315 retval->hicut = (opt1->hicut != kSegHicut) ? opt1->hicut : opt2->hicut;
316 }
317 return retval;
318 }
319
320 /** Merges together two sets of repeat filter options, choosing the most non-default one.
321 *
322 * @param opt1 first set to be merged [in]
323 * @param opt2 second set to be merged [in]
324 * @return the merged options.
325 */
s_MergeRepeatOptions(const SRepeatFilterOptions * opt1,const SRepeatFilterOptions * opt2)326 static SRepeatFilterOptions* s_MergeRepeatOptions(const SRepeatFilterOptions* opt1, const SRepeatFilterOptions* opt2)
327 {
328 SRepeatFilterOptions* retval = NULL;
329
330 if (!opt1 && !opt2)
331 return NULL;
332
333 SRepeatFilterOptionsNew(&retval);
334
335 if (opt1 && !opt2)
336 {
337 SRepeatFilterOptionsResetDB(&retval, opt1->database);
338 }
339 else if (!opt1 && opt2)
340 {
341 SRepeatFilterOptionsResetDB(&retval, opt2->database);
342 }
343 else
344 { /* TODO : handle different db's. */
345 SRepeatFilterOptionsResetDB(&retval, opt2->database);
346 }
347 return retval;
348 }
349
350 /** Merges together two sets of window masker options, choosing the most non-default one.
351 *
352 * @param opt1 first set to be merged [in]
353 * @param opt2 second set to be merged [in]
354 * @return the merged options.
355 */
356 static SWindowMaskerOptions*
s_MergeWindowMaskerOptions(const SWindowMaskerOptions * opt1,const SWindowMaskerOptions * opt2)357 s_MergeWindowMaskerOptions(const SWindowMaskerOptions* opt1,
358 const SWindowMaskerOptions* opt2)
359 {
360 SWindowMaskerOptions* retval = NULL;
361 const SWindowMaskerOptions* src = NULL;
362 Boolean have1 = FALSE, have2 = FALSE;
363
364 have1 = opt1 && (opt1->database || opt1->taxid);
365 have2 = opt2 && (opt2->database || opt2->taxid);
366
367 if (! (have1 || have2))
368 return NULL;
369
370 if (have1 && ! have2) {
371 src = opt1;
372 } else if (! have1 && have2) {
373 src = opt2;
374 } else {
375 // We have data structures with some kind of content, so
376 // prefer structure 2 as repeat filter options do.
377 src = opt2;
378 }
379
380 ASSERT(src);
381 ASSERT(src->database || src->taxid);
382
383 SWindowMaskerOptionsNew(&retval);
384 SWindowMaskerOptionsResetDB(& retval, src->database);
385 retval->taxid = src->taxid;
386
387 return retval;
388 }
389
SBlastFilterOptionsMerge(SBlastFilterOptions ** combined,const SBlastFilterOptions * opt1,const SBlastFilterOptions * opt2)390 Int2 SBlastFilterOptionsMerge(SBlastFilterOptions** combined, const SBlastFilterOptions* opt1,
391 const SBlastFilterOptions* opt2)
392 {
393 SBlastFilterOptions* retval = NULL;
394 Int2 status = 0;
395
396 *combined = NULL;
397
398 if (opt1 == NULL && opt2 == NULL)
399 return 0;
400
401 status = SBlastFilterOptionsNew(&retval, eEmpty);
402 if (status != 0)
403 return status;
404
405 *combined = retval;
406
407 if ((opt1 && opt1->mask_at_hash) || (opt2 && opt2->mask_at_hash))
408 retval->mask_at_hash = TRUE;
409
410 retval->dustOptions =
411 s_MergeDustOptions(opt1 ? opt1->dustOptions : NULL, opt2 ? opt2->dustOptions : NULL);
412 retval->segOptions =
413 s_MergeSegOptions(opt1 ? opt1->segOptions : NULL, opt2 ? opt2->segOptions : NULL);
414 retval->repeatFilterOptions =
415 s_MergeRepeatOptions(opt1 ? opt1->repeatFilterOptions : NULL, opt2 ? opt2->repeatFilterOptions : NULL);
416 retval->windowMaskerOptions =
417 s_MergeWindowMaskerOptions(opt1 ? opt1->windowMaskerOptions : NULL, opt2 ? opt2->windowMaskerOptions : NULL);
418
419 return 0;
420 }
421
SBlastFilterOptionsNoFiltering(const SBlastFilterOptions * filter_options)422 Boolean SBlastFilterOptionsNoFiltering(const SBlastFilterOptions* filter_options)
423 {
424 if (filter_options == NULL)
425 return TRUE;
426
427 return filter_options->dustOptions == NULL &&
428 filter_options->segOptions == NULL &&
429 filter_options->repeatFilterOptions == NULL &&
430 filter_options->windowMaskerOptions == NULL;
431 }
432
SBlastFilterOptionsMaskAtHash(const SBlastFilterOptions * filter_options)433 Boolean SBlastFilterOptionsMaskAtHash(const SBlastFilterOptions* filter_options)
434 {
435 if (filter_options == NULL)
436 return FALSE;
437
438 return filter_options->mask_at_hash;
439 }
440
SBlastFilterOptionsValidate(EBlastProgramType program_number,const SBlastFilterOptions * filter_options,Blast_Message ** blast_message)441 Int2 SBlastFilterOptionsValidate(EBlastProgramType program_number, const SBlastFilterOptions* filter_options, Blast_Message* *blast_message)
442 {
443 Int2 status = 0;
444
445 if (filter_options == NULL)
446 {
447 Blast_MessageWrite(blast_message, eBlastSevWarning, kBlastMessageNoContext,
448 "SBlastFilterOptionsValidate: NULL filter_options");
449 return BLASTERR_INVALIDPARAM;
450 }
451
452 if (filter_options->repeatFilterOptions)
453 {
454 if (program_number != eBlastTypeBlastn &&
455 program_number != eBlastTypeMapping)
456 {
457 if (blast_message)
458 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
459 "SBlastFilterOptionsValidate: Repeat filtering only supported with blastn");
460 return BLASTERR_OPTION_PROGRAM_INVALID;
461 }
462 if (filter_options->repeatFilterOptions->database == NULL ||
463 strlen(filter_options->repeatFilterOptions->database) == 0)
464 {
465 if (blast_message)
466 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
467 "SBlastFilterOptionsValidate: No repeat database specified for repeat filtering");
468 return BLASTERR_INVALIDPARAM;
469 }
470 }
471
472 if (filter_options->dustOptions)
473 {
474 if (program_number != eBlastTypeBlastn &&
475 program_number != eBlastTypeMapping)
476 {
477 if (blast_message)
478 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
479 "SBlastFilterOptionsValidate: Dust filtering only supported with blastn");
480 return BLASTERR_OPTION_PROGRAM_INVALID;
481 }
482 }
483
484 if (filter_options->segOptions)
485 {
486 if (program_number == eBlastTypeBlastn &&
487 program_number != eBlastTypeMapping)
488 {
489 if (blast_message)
490 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
491 "SBlastFilterOptionsValidate: SEG filtering is not supported with blastn");
492 return BLASTERR_OPTION_PROGRAM_INVALID;
493 }
494 }
495
496 return status;
497 }
498
499
500 QuerySetUpOptions*
BlastQuerySetUpOptionsFree(QuerySetUpOptions * options)501 BlastQuerySetUpOptionsFree(QuerySetUpOptions* options)
502
503 {
504 if (options)
505 {
506 sfree(options->filter_string);
507 options->filtering_options = SBlastFilterOptionsFree(options->filtering_options);
508 sfree(options);
509 }
510 return NULL;
511 }
512
513 Int2
BlastQuerySetUpOptionsNew(QuerySetUpOptions ** options)514 BlastQuerySetUpOptionsNew(QuerySetUpOptions* *options)
515 {
516 Int2 status = 0;
517
518 if (options == NULL)
519 return BLASTERR_INVALIDPARAM;
520
521 *options = (QuerySetUpOptions*) calloc(1, sizeof(QuerySetUpOptions));
522
523 if (*options == NULL)
524 return BLASTERR_MEMORY;
525
526 (*options)->genetic_code = BLAST_GENETIC_CODE;
527
528 /** @todo the code below should be deprecated */
529 status = SBlastFilterOptionsNew(&((*options)->filtering_options), eEmpty);
530
531 return status;
532 }
533
BLAST_FillQuerySetUpOptions(QuerySetUpOptions * options,EBlastProgramType program,const char * filter_string,Uint1 strand_option)534 Int2 BLAST_FillQuerySetUpOptions(QuerySetUpOptions* options,
535 EBlastProgramType program, const char *filter_string, Uint1 strand_option)
536 {
537 Int2 status = 0;
538
539 if (options == NULL)
540 return BLASTERR_INVALIDPARAM;
541
542 if (strand_option &&
543 (program == eBlastTypeBlastn || program == eBlastTypePhiBlastn ||
544 program == eBlastTypeBlastx || program == eBlastTypeTblastx ||
545 program == eBlastTypeMapping)) {
546 options->strand_option = strand_option;
547 }
548
549 if (filter_string) {
550 /* Free whatever filter string has been set before. */
551 sfree(options->filter_string);
552 /* Free whatever filtering options have been set. */
553 options->filtering_options = SBlastFilterOptionsFree(options->filtering_options);
554 /* Parse the filter_string for options, do not save the string. */
555 status = BlastFilteringOptionsFromString(program, filter_string,
556 &options->filtering_options, NULL);
557 }
558 return status;
559 }
560
561 BlastInitialWordOptions*
BlastInitialWordOptionsFree(BlastInitialWordOptions * options)562 BlastInitialWordOptionsFree(BlastInitialWordOptions* options)
563
564 {
565
566 sfree(options);
567
568 return NULL;
569 }
570
571
572 Int2
BlastInitialWordOptionsNew(EBlastProgramType program,BlastInitialWordOptions ** options)573 BlastInitialWordOptionsNew(EBlastProgramType program,
574 BlastInitialWordOptions* *options)
575 {
576 *options =
577 (BlastInitialWordOptions*) calloc(1, sizeof(BlastInitialWordOptions));
578 if (*options == NULL)
579 return BLASTERR_MEMORY;
580
581 if (/*program != eBlastTypeBlastn &&
582 program != eBlastTypePhiBlastn */
583 !Blast_ProgramIsNucleotide(program)) { /* protein-protein options. */
584 (*options)->window_size = BLAST_WINDOW_SIZE_PROT;
585 (*options)->x_dropoff = BLAST_UNGAPPED_X_DROPOFF_PROT;
586 (*options)->gap_trigger = BLAST_GAP_TRIGGER_PROT;
587 } else {
588 (*options)->window_size = BLAST_WINDOW_SIZE_NUCL;
589 (*options)->scan_range = BLAST_SCAN_RANGE_NUCL;
590 (*options)->gap_trigger = BLAST_GAP_TRIGGER_NUCL;
591 (*options)->x_dropoff = BLAST_UNGAPPED_X_DROPOFF_NUCL;
592 }
593
594 (*options)->program_number = program;
595
596 return 0;
597 }
598
599
600 Int2
BlastInitialWordOptionsValidate(EBlastProgramType program_number,const BlastInitialWordOptions * options,Blast_Message ** blast_msg)601 BlastInitialWordOptionsValidate(EBlastProgramType program_number,
602 const BlastInitialWordOptions* options,
603 Blast_Message* *blast_msg)
604 {
605
606 ASSERT(options);
607
608 /* PHI-BLAST has no ungapped extension phase. Megablast may not have it,
609 but generally does now. */
610 if (program_number != eBlastTypeBlastn &&
611 program_number != eBlastTypeMapping &&
612 (!Blast_ProgramIsPhiBlast(program_number)) &&
613 options->x_dropoff <= 0.0)
614 {
615 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
616 "x_dropoff must be greater than zero");
617 return BLASTERR_OPTION_VALUE_INVALID;
618 }
619
620 if (program_number == eBlastTypeBlastn &&
621 options->scan_range && !options->window_size)
622 {
623 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
624 "off_diagonal_range is only useful in 2-hit algorithm");
625 return BLASTERR_OPTION_VALUE_INVALID;
626 }
627
628
629 return 0;
630 }
631
632
633 Int2
BLAST_FillInitialWordOptions(BlastInitialWordOptions * options,EBlastProgramType program,Int4 window_size,double xdrop_ungapped)634 BLAST_FillInitialWordOptions(BlastInitialWordOptions* options,
635 EBlastProgramType program, Int4 window_size,
636 double xdrop_ungapped)
637 {
638 if (!options)
639 return BLASTERR_INVALIDPARAM;
640
641 if (window_size != 0)
642 options->window_size = window_size;
643 if (xdrop_ungapped != 0)
644 options->x_dropoff = xdrop_ungapped;
645
646 return 0;
647 }
648
649 BlastExtensionOptions*
BlastExtensionOptionsFree(BlastExtensionOptions * options)650 BlastExtensionOptionsFree(BlastExtensionOptions* options)
651
652 {
653
654 sfree(options);
655
656 return NULL;
657 }
658
659 Int2
BlastExtensionOptionsNew(EBlastProgramType program,BlastExtensionOptions ** options,Boolean gapped)660 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions* *options, Boolean gapped)
661
662 {
663 *options = (BlastExtensionOptions*)
664 calloc(1, sizeof(BlastExtensionOptions));
665
666 if (*options == NULL)
667 return BLASTERR_MEMORY;
668
669 if (/* program != eBlastTypeBlastn &&
670 program != eBlastTypePhiBlastn*/
671 !Blast_ProgramIsNucleotide(program)) /* protein-protein options. */
672 {
673 (*options)->gap_x_dropoff = BLAST_GAP_X_DROPOFF_PROT;
674 (*options)->gap_x_dropoff_final =
675 BLAST_GAP_X_DROPOFF_FINAL_PROT;
676 } else {
677 (*options)->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL;
678 (*options)->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
679 }
680
681 (*options)->ePrelimGapExt = eDynProgScoreOnly;
682 (*options)->eTbackExt = eDynProgTbck;
683 (*options)->compositionBasedStats = eNoCompositionBasedStats;
684
685 /** @todo how to determine this for PSI-BLAST bootstrap run (i.e. when
686 * program is blastp? */
687 if (gapped && (Blast_QueryIsPssm(program) && ! Blast_SubjectIsTranslated(program))) {
688 (*options)->compositionBasedStats = eCompositionBasedStats;
689 }
690
691 (*options)->max_mismatches = 5;
692 (*options)->mismatch_window = 10;
693 (*options)->program_number = program;
694
695 return 0;
696 }
697
698 Int2
BLAST_FillExtensionOptions(BlastExtensionOptions * options,EBlastProgramType program,Int4 greedy,double x_dropoff,double x_dropoff_final)699 BLAST_FillExtensionOptions(BlastExtensionOptions* options,
700 EBlastProgramType program, Int4 greedy, double x_dropoff,
701 double x_dropoff_final)
702 {
703 if (!options)
704 return BLASTERR_INVALIDPARAM;
705
706 if (/*program == eBlastTypeBlastn || program == eBlastTypePhiBlastn*/
707 Blast_ProgramIsNucleotide(program)) {
708 if (greedy) {
709 options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_GREEDY;
710 options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
711 options->ePrelimGapExt = eGreedyScoreOnly;
712 options->eTbackExt = eGreedyTbck;
713 } else {
714 options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL;
715 options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
716 options->ePrelimGapExt = eDynProgScoreOnly;
717 options->eTbackExt = eDynProgTbck;
718 }
719 }
720
721 if (Blast_QueryIsPssm(program) && ! Blast_SubjectIsTranslated(program)) {
722 options->compositionBasedStats = eCompositionBasedStats;
723 }
724
725 if (x_dropoff)
726 options->gap_x_dropoff = x_dropoff;
727 if (x_dropoff_final) {
728 options->gap_x_dropoff_final = x_dropoff_final;
729 } else {
730 /* Final X-dropoff can't be smaller than preliminary X-dropoff */
731 options->gap_x_dropoff_final =
732 MAX(options->gap_x_dropoff_final, x_dropoff);
733 }
734
735 return 0;
736
737 }
738
739 Int2
BlastExtensionOptionsValidate(EBlastProgramType program_number,const BlastExtensionOptions * options,Blast_Message ** blast_msg)740 BlastExtensionOptionsValidate(EBlastProgramType program_number,
741 const BlastExtensionOptions* options, Blast_Message* *blast_msg)
742
743 {
744 if (options == NULL)
745 return BLASTERR_INVALIDPARAM;
746
747 if (program_number != eBlastTypeBlastn &&
748 program_number != eBlastTypeMapping &&
749 (options->ePrelimGapExt == eGreedyScoreOnly ||
750 options->eTbackExt == eGreedyTbck))
751 {
752 Blast_MessageWrite(blast_msg, eBlastSevWarning,
753 kBlastMessageNoContext,
754 "Greedy extension only supported for BLASTN");
755 return BLASTERR_OPTION_PROGRAM_INVALID;
756 }
757
758 if ((options->ePrelimGapExt == eSmithWatermanScoreOnly &&
759 options->eTbackExt != eSmithWatermanTbckFull) ||
760 (options->ePrelimGapExt != eSmithWatermanScoreOnly &&
761 options->eTbackExt == eSmithWatermanTbckFull))
762 {
763 Blast_MessageWrite(blast_msg, eBlastSevWarning,
764 kBlastMessageNoContext,
765 "Score-only and traceback Smith-Waterman must "
766 "both be specified");
767 return BLASTERR_OPTION_VALUE_INVALID;
768 }
769
770 return 0;
771 }
772
773 BlastScoringOptions*
BlastScoringOptionsFree(BlastScoringOptions * options)774 BlastScoringOptionsFree(BlastScoringOptions* options)
775
776 {
777 if (options == NULL)
778 return NULL;
779
780 sfree(options->matrix);
781 sfree(options->matrix_path);
782 sfree(options);
783
784 return NULL;
785 }
786
787 Int2
BlastScoringOptionsNew(EBlastProgramType program_number,BlastScoringOptions ** options)788 BlastScoringOptionsNew(EBlastProgramType program_number, BlastScoringOptions* *options)
789 {
790 *options = (BlastScoringOptions*) calloc(1, sizeof(BlastScoringOptions));
791
792 if (*options == NULL)
793 return BLASTERR_INVALIDPARAM;
794
795 if (/*program_number != eBlastTypeBlastn &&
796 program_number != eBlastTypePhiBlastn*/
797 !Blast_ProgramIsNucleotide(program_number)) {/*protein-protein options.*/
798 (*options)->shift_pen = INT2_MAX;
799 (*options)->is_ooframe = FALSE;
800 (*options)->gap_open = BLAST_GAP_OPEN_PROT;
801 (*options)->gap_extend = BLAST_GAP_EXTN_PROT;
802 (*options)->matrix = strdup(BLAST_DEFAULT_MATRIX);
803 } else { /* nucleotide-nucleotide options. */
804 (*options)->penalty = BLAST_PENALTY;
805 (*options)->reward = BLAST_REWARD;
806 /* This is correct except when greedy extension is used. In that case
807 these values would have to be reset. */
808 (*options)->gap_open = BLAST_GAP_OPEN_NUCL;
809 (*options)->gap_extend = BLAST_GAP_EXTN_NUCL;
810 }
811 if (program_number != eBlastTypeTblastx) {
812 (*options)->gapped_calculation = TRUE;
813 }
814 (*options)->program_number = program_number;
815 /* By default cross_match-like complexity adjusted scoring is
816 turned off. RMBlastN is currently the only program to use this. -RMH */
817 (*options)->complexity_adjusted_scoring = FALSE;
818
819 return 0;
820 }
821
822 Int2
BLAST_FillScoringOptions(BlastScoringOptions * options,EBlastProgramType program_number,Boolean greedy_extension,Int4 penalty,Int4 reward,const char * matrix,Int4 gap_open,Int4 gap_extend)823 BLAST_FillScoringOptions(BlastScoringOptions* options,
824 EBlastProgramType program_number, Boolean greedy_extension, Int4 penalty, Int4 reward,
825 const char *matrix, Int4 gap_open, Int4 gap_extend)
826 {
827 if (!options)
828 return BLASTERR_INVALIDPARAM;
829
830 if (/*program_number != eBlastTypeBlastn &&
831 program_number != eBlastTypePhiBlastn*/
832 !Blast_ProgramIsNucleotide(program_number)) {/* protein-protein options. */
833 /* If matrix name is not provided, keep the default "BLOSUM62" value filled in
834 BlastScoringOptionsNew, otherwise reset it. */
835 if (matrix)
836 BlastScoringOptionsSetMatrix(options, matrix);
837 } else { /* nucleotide-nucleotide options. */
838 if (penalty)
839 options->penalty = penalty;
840 if (reward)
841 options->reward = reward;
842
843 if (greedy_extension) {
844 options->gap_open = BLAST_GAP_OPEN_MEGABLAST;
845 options->gap_extend = BLAST_GAP_EXTN_MEGABLAST;
846 } else {
847 options->gap_open = BLAST_GAP_OPEN_NUCL;
848 options->gap_extend = BLAST_GAP_EXTN_NUCL;
849 }
850 }
851 if (gap_open >= 0)
852 options->gap_open = gap_open;
853 if (gap_extend >= 0)
854 options->gap_extend = gap_extend;
855
856 options->program_number = program_number;
857
858 return 0;
859 }
860
861 Int2
BlastScoringOptionsValidate(EBlastProgramType program_number,const BlastScoringOptions * options,Blast_Message ** blast_msg)862 BlastScoringOptionsValidate(EBlastProgramType program_number,
863 const BlastScoringOptions* options, Blast_Message* *blast_msg)
864
865 {
866 if (options == NULL)
867 return BLASTERR_INVALIDPARAM;
868
869 if (program_number == eBlastTypeTblastx && options->gapped_calculation)
870 {
871 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
872 "Gapped search is not allowed for tblastx");
873 return BLASTERR_OPTION_PROGRAM_INVALID;
874 }
875
876 if (/*program_number == eBlastTypeBlastn || program_number == eBlastTypePhiBlastn*/
877 Blast_ProgramIsNucleotide(program_number))
878 {
879 // A penalty/reward of 0/0 is a signal that this is rmblastn
880 // which allows specification of penalties as positive integers.
881 if ( ! ( options->penalty == 0 && options->reward == 0 ) )
882 {
883 if (options->penalty >= 0)
884 {
885 Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
886 "BLASTN penalty must be negative");
887 return BLASTERR_OPTION_VALUE_INVALID;
888 }
889
890 /* !!! this is temporary until there is jumper or mapping options handle */
891 if (0 && options->gapped_calculation &&
892 !Blast_ProgramIsMapping(program_number) &&
893 !BLAST_CheckRewardPenaltyScores(options->reward, options->penalty))
894 {
895 Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
896 "BLASTN reward/penalty combination not supported for gapped search");
897 return BLASTERR_OPTION_VALUE_INVALID;
898 }
899 }
900
901 if (options->gapped_calculation && options->gap_open > 0 && options->gap_extend == 0)
902 {
903 Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
904 "BLASTN gap extension penalty cannot be 0");
905 return BLASTERR_OPTION_VALUE_INVALID;
906 }
907 }
908 else
909 {
910 if (options->gapped_calculation && !Blast_ProgramIsRpsBlast(program_number))
911 {
912 Int2 status=0;
913 Boolean std_matrix_only =
914 (program_number != eBlastTypeBlastp &&
915 program_number != eBlastTypeTblastn);
916 if ((status=Blast_KarlinBlkGappedLoadFromTables(NULL, options->gap_open,
917 options->gap_extend, options->matrix, std_matrix_only)) != 0)
918 {
919 if (status == 1)
920 {
921 char* buffer;
922
923 buffer = BLAST_PrintMatrixMessage(options->matrix,
924 std_matrix_only);
925
926 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext, buffer);
927 sfree(buffer);
928 return BLASTERR_OPTION_VALUE_INVALID;
929
930 }
931 else if (status == 2)
932 {
933 char* buffer;
934
935 buffer = BLAST_PrintAllowedValues(options->matrix,
936 options->gap_open, options->gap_extend);
937 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext, buffer);
938 sfree(buffer);
939 return BLASTERR_OPTION_VALUE_INVALID;
940 }
941 }
942 }
943 }
944
945 if (program_number != eBlastTypeBlastx && program_number != eBlastTypeTblastn && options->is_ooframe)
946 {
947 Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
948 "Out-of-frame only permitted for blastx and tblastn");
949 return BLASTERR_OPTION_PROGRAM_INVALID;
950 }
951
952 return 0;
953 }
954
955 Int2
BlastScoringOptionsDup(BlastScoringOptions ** new_opt,const BlastScoringOptions * old_opt)956 BlastScoringOptionsDup(BlastScoringOptions* *new_opt, const BlastScoringOptions* old_opt)
957 {
958 if (old_opt == NULL || new_opt == NULL)
959 return BLASTERR_INVALIDPARAM;
960
961 *new_opt = (BlastScoringOptions*) BlastMemDup(old_opt, sizeof(BlastScoringOptions));
962 if (*new_opt == NULL)
963 return BLASTERR_MEMORY;
964
965 if (old_opt->matrix)
966 (*new_opt)->matrix = strdup(old_opt->matrix);
967
968 if (old_opt->matrix_path)
969 (*new_opt)->matrix_path = strdup(old_opt->matrix_path);
970
971 return 0;
972 }
973
BlastScoringOptionsSetMatrix(BlastScoringOptions * opts,const char * matrix_name)974 Int2 BlastScoringOptionsSetMatrix(BlastScoringOptions* opts,
975 const char* matrix_name)
976 {
977 Uint4 i;
978
979 if (matrix_name) {
980 sfree(opts->matrix);
981 opts->matrix = strdup(matrix_name);
982 /* Make it all upper case */
983 for (i=0; i<strlen(opts->matrix); ++i)
984 opts->matrix[i] = toupper((unsigned char) opts->matrix[i]);
985 }
986 return 0;
987 }
988
989 BlastEffectiveLengthsOptions*
BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions * options)990 BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions* options)
991
992 {
993 if (options == NULL)
994 return NULL;
995
996 sfree(options->searchsp_eff);
997 sfree(options);
998 return NULL;
999 }
1000
1001
1002 Int2
BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions ** options)1003 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions* *options)
1004
1005 {
1006 if (options == NULL) {
1007 return BLASTERR_INVALIDPARAM;
1008 }
1009
1010 *options = (BlastEffectiveLengthsOptions*)
1011 calloc(1, sizeof(BlastEffectiveLengthsOptions));
1012
1013 if (*options == NULL)
1014 return BLASTERR_MEMORY;
1015
1016 return 0;
1017 }
1018
1019 Boolean
BlastEffectiveLengthsOptions_IsSearchSpaceSet(const BlastEffectiveLengthsOptions * options)1020 BlastEffectiveLengthsOptions_IsSearchSpaceSet(const
1021 BlastEffectiveLengthsOptions*
1022 options)
1023 {
1024 int i;
1025 if ( !options || options->searchsp_eff == NULL) {
1026 return FALSE;
1027 }
1028
1029 for (i = 0; i < options->num_searchspaces; i++) {
1030 if (options->searchsp_eff[i] != 0) {
1031 return TRUE;
1032 }
1033 }
1034 return FALSE;
1035 }
1036
1037 Int2
BLAST_FillEffectiveLengthsOptions(BlastEffectiveLengthsOptions * options,Int4 dbseq_num,Int8 db_length,Int8 * searchsp_eff,Int4 num_searchsp)1038 BLAST_FillEffectiveLengthsOptions(BlastEffectiveLengthsOptions* options,
1039 Int4 dbseq_num, Int8 db_length, Int8* searchsp_eff, Int4 num_searchsp)
1040 {
1041 Int4 index;
1042 if (!options)
1043 return BLASTERR_INVALIDPARAM;
1044
1045 if (num_searchsp > options->num_searchspaces) {
1046 options->num_searchspaces = num_searchsp;
1047 options->searchsp_eff = (Int8 *)realloc(options->searchsp_eff,
1048 num_searchsp * sizeof(Int8));
1049 if (options->searchsp_eff == NULL)
1050 return BLASTERR_MEMORY;
1051 }
1052
1053 for (index = 0; index < options->num_searchspaces; index++)
1054 options->searchsp_eff[index] = searchsp_eff[index];
1055
1056 options->dbseq_num = dbseq_num;
1057 options->db_length = db_length;
1058
1059 return 0;
1060 }
1061
1062 LookupTableOptions*
LookupTableOptionsFree(LookupTableOptions * options)1063 LookupTableOptionsFree(LookupTableOptions* options)
1064
1065 {
1066
1067 if (options == NULL)
1068 return NULL;
1069
1070 sfree(options->phi_pattern);
1071
1072 sfree(options);
1073 return NULL;
1074 }
1075
1076 Int2
LookupTableOptionsNew(EBlastProgramType program_number,LookupTableOptions ** options)1077 LookupTableOptionsNew(EBlastProgramType program_number, LookupTableOptions* *options)
1078 {
1079 *options = (LookupTableOptions*) calloc(1, sizeof(LookupTableOptions));
1080
1081 if (*options == NULL)
1082 return BLASTERR_INVALIDPARAM;
1083
1084 switch (program_number) {
1085 case eBlastTypeMapping:
1086 (*options)->max_db_word_count = MAX_DB_WORD_COUNT_MAPPER;
1087 case eBlastTypeBlastn:
1088 /* Blastn default is megablast. */
1089 (*options)->word_size = BLAST_WORDSIZE_MEGABLAST;
1090 (*options)->lut_type = eMBLookupTable;
1091 break;
1092 case eBlastTypeRpsBlast: case eBlastTypeRpsTblastn:
1093 (*options)->word_size = BLAST_WORDSIZE_PROT;
1094 (*options)->lut_type = eRPSLookupTable;
1095
1096 if (program_number == eBlastTypeRpsBlast)
1097 (*options)->threshold = BLAST_WORD_THRESHOLD_BLASTP;
1098 else
1099 (*options)->threshold = BLAST_WORD_THRESHOLD_TBLASTN;
1100 break;
1101 case eBlastTypePhiBlastn:
1102 (*options)->lut_type = ePhiNaLookupTable;
1103 break;
1104 case eBlastTypePhiBlastp:
1105 (*options)->lut_type = ePhiLookupTable;
1106 break;
1107 default:
1108 (*options)->word_size = BLAST_WORDSIZE_PROT;
1109 (*options)->lut_type = eAaLookupTable;
1110
1111 if (program_number == eBlastTypeBlastp)
1112 (*options)->threshold = BLAST_WORD_THRESHOLD_BLASTP;
1113 else if (program_number == eBlastTypeBlastx)
1114 (*options)->threshold = BLAST_WORD_THRESHOLD_BLASTX;
1115 else if (program_number == eBlastTypeTblastn)
1116 (*options)->threshold = BLAST_WORD_THRESHOLD_TBLASTN;
1117 else if (program_number == eBlastTypeTblastx)
1118 (*options)->threshold = BLAST_WORD_THRESHOLD_TBLASTX;
1119 break;
1120 }
1121
1122 (*options)->program_number = program_number;
1123 (*options)->stride = 0;
1124
1125 return 0;
1126 }
1127
1128 Int2
BLAST_FillLookupTableOptions(LookupTableOptions * options,EBlastProgramType program_number,Boolean is_megablast,double threshold,Int4 word_size)1129 BLAST_FillLookupTableOptions(LookupTableOptions* options,
1130 EBlastProgramType program_number, Boolean is_megablast,
1131 double threshold, Int4 word_size)
1132 {
1133 if (!options)
1134 return BLASTERR_INVALIDPARAM;
1135
1136 if (program_number == eBlastTypeBlastn) {
1137
1138 if (is_megablast) {
1139 options->lut_type = eMBLookupTable;
1140 options->word_size = BLAST_WORDSIZE_MEGABLAST;
1141 } else {
1142 options->lut_type = eNaLookupTable;
1143 options->word_size = BLAST_WORDSIZE_NUCL;
1144 }
1145 } else if (program_number == eBlastTypeMapping) {
1146 options->lut_type = eNaHashLookupTable;
1147 options->word_size = BLAST_WORDSIZE_MAPPER;
1148 options->max_db_word_count = MAX_DB_WORD_COUNT_MAPPER;
1149 } else {
1150 options->lut_type = eAaLookupTable;
1151 }
1152
1153 /* if the supplied threshold is negative, disable neighboring words */
1154 if (threshold < 0)
1155 options->threshold = 0;
1156
1157 /* if the supplied threshold is > 0, use it otherwise, use the default */
1158 if (threshold > 0)
1159 options->threshold = threshold;
1160
1161 if (Blast_ProgramIsRpsBlast(program_number))
1162 options->lut_type = eRPSLookupTable;
1163 if (word_size)
1164 options->word_size = word_size;
1165 if ((program_number == eBlastTypeTblastn ||
1166 program_number == eBlastTypeBlastp ||
1167 program_number == eBlastTypeBlastx) &&
1168 word_size > 5)
1169 options->lut_type = eCompressedAaLookupTable;
1170
1171 return 0;
1172 }
1173
BLAST_GetSuggestedThreshold(EBlastProgramType program_number,const char * matrixName,double * threshold)1174 Int2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char* matrixName, double* threshold)
1175 {
1176
1177 const double kB62_threshold = 11;
1178
1179 if (program_number == eBlastTypeBlastn ||
1180 program_number == eBlastTypeMapping)
1181 return 0;
1182
1183 if (matrixName == NULL)
1184 return BLASTERR_INVALIDPARAM;
1185
1186 if(strcasecmp(matrixName, "BLOSUM62") == 0)
1187 *threshold = kB62_threshold;
1188 else if(strcasecmp(matrixName, "BLOSUM45") == 0)
1189 *threshold = 14;
1190 else if(strcasecmp(matrixName, "BLOSUM62_20") == 0)
1191 *threshold = 100;
1192 else if(strcasecmp(matrixName, "BLOSUM80") == 0)
1193 *threshold = 12;
1194 else if(strcasecmp(matrixName, "PAM30") == 0)
1195 *threshold = 16;
1196 else if(strcasecmp(matrixName, "PAM70") == 0)
1197 *threshold = 14;
1198 else if(strcasecmp(matrixName, "IDENTITY") == 0)
1199 *threshold = 27;
1200 else
1201 *threshold = kB62_threshold;
1202
1203 if (Blast_SubjectIsTranslated(program_number) == TRUE)
1204 *threshold += 2; /* Covers tblastn, tblastx, psi-tblastn rpstblastn. */
1205 else if (Blast_QueryIsTranslated(program_number) == TRUE)
1206 *threshold += 1;
1207
1208 return 0;
1209 }
1210
BLAST_GetSuggestedWindowSize(EBlastProgramType program_number,const char * matrixName,Int4 * window_size)1211 Int2 BLAST_GetSuggestedWindowSize(EBlastProgramType program_number, const char* matrixName, Int4* window_size)
1212 {
1213 const Int4 kB62_windowsize = 40;
1214
1215 if (program_number == eBlastTypeBlastn ||
1216 program_number == eBlastTypeMapping)
1217 return 0;
1218
1219 if (matrixName == NULL)
1220 return BLASTERR_INVALIDPARAM;
1221
1222 if(strcasecmp(matrixName, "BLOSUM62") == 0)
1223 *window_size = kB62_windowsize;
1224 else if(strcasecmp(matrixName, "BLOSUM45") == 0)
1225 *window_size = 60;
1226 else if(strcasecmp(matrixName, "BLOSUM80") == 0)
1227 *window_size = 25;
1228 else if(strcasecmp(matrixName, "PAM30") == 0)
1229 *window_size = 15;
1230 else if(strcasecmp(matrixName, "PAM70") == 0)
1231 *window_size = 20;
1232 else
1233 *window_size = kB62_windowsize;
1234
1235 return 0;
1236 }
1237
1238 /** Validate options for the discontiguous word megablast
1239 * Word size must be 11 or 12; template length 16, 18 or 21;
1240 * template type 0, 1 or 2.
1241 * @param word_size Word size option [in]
1242 * @param template_length Discontiguous template length [in]
1243 * @param template_type Discontiguous template type [in]
1244 * @param blast_msg Used for storing error messages [in][out]
1245 * @return TRUE if options combination valid.
1246 */
1247 static Boolean
s_DiscWordOptionsValidate(Int4 word_size,Uint1 template_length,Uint1 template_type,Blast_Message ** blast_msg)1248 s_DiscWordOptionsValidate(Int4 word_size, Uint1 template_length,
1249 Uint1 template_type,
1250 Blast_Message** blast_msg)
1251 {
1252 if (template_length == 0)
1253 return TRUE;
1254
1255
1256 if (word_size != 11 && word_size != 12) {
1257 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1258 "Invalid discontiguous template parameters: word "
1259 "size must be either 11 or 12");
1260 return FALSE;
1261 }
1262
1263 if (template_length != 16 && template_length != 18 &&
1264 template_length != 21) {
1265 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1266 "Invalid discontiguous template parameters: "
1267 "template length must be 16, 18, or 21");
1268 return FALSE;
1269 }
1270
1271 if (template_type > 2) {
1272 /* should never fail coming from the C++ APIs as we represent these as
1273 * strings */
1274 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1275 "Invalid discontiguous template parameters: "
1276 "template type must be 0, 1, or 2");
1277 return FALSE;
1278 }
1279
1280 return TRUE;
1281 }
1282
1283 Int2
LookupTableOptionsValidate(EBlastProgramType program_number,const LookupTableOptions * options,Blast_Message ** blast_msg)1284 LookupTableOptionsValidate(EBlastProgramType program_number,
1285 const LookupTableOptions* options, Blast_Message* *blast_msg)
1286
1287 {
1288 const Boolean kPhiBlast = Blast_ProgramIsPhiBlast(program_number);
1289
1290 if (options == NULL)
1291 return BLASTERR_INVALIDPARAM;
1292
1293 if (options->phi_pattern && !kPhiBlast) {
1294 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1295 "PHI pattern can be specified only for blastp and blastn");
1296 return BLASTERR_OPTION_PROGRAM_INVALID;
1297 }
1298
1299 /* For PHI BLAST, the subsequent word size tests are not needed. */
1300 if (kPhiBlast)
1301 return 0;
1302
1303 if (program_number != eBlastTypeBlastn &&
1304 program_number != eBlastTypeMapping &&
1305 (!Blast_ProgramIsRpsBlast(program_number)) &&
1306 options->threshold <= 0)
1307 {
1308 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1309 "Non-zero threshold required");
1310 return BLASTERR_OPTION_VALUE_INVALID;
1311 }
1312
1313 if (options->word_size <= 0)
1314 {
1315 if ( !Blast_ProgramIsRpsBlast(program_number)) {
1316 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1317 "Word-size must be greater than zero");
1318 return BLASTERR_OPTION_VALUE_INVALID;
1319 }
1320 } else if (/*program_number == eBlastTypeBlastn*/
1321 Blast_ProgramIsNucleotide(program_number) &&
1322 !Blast_QueryIsPattern(program_number) && options->word_size < 4)
1323 {
1324 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1325 "Word-size must be 4 or greater for nucleotide comparison");
1326 return BLASTERR_OPTION_VALUE_INVALID;
1327 } else if (program_number != eBlastTypeBlastn &&
1328 program_number != eBlastTypeMapping && options->word_size > 5)
1329 {
1330 if (program_number == eBlastTypeBlastp ||
1331 program_number == eBlastTypeTblastn ||
1332 program_number == eBlastTypeBlastx)
1333 {
1334 if (options->word_size > 7) {
1335 Blast_MessageWrite(blast_msg, eBlastSevError,
1336 kBlastMessageNoContext,
1337 "Word-size must be less than "
1338 "8 for a tblastn, blastp or blastx search");
1339 return BLASTERR_OPTION_VALUE_INVALID;
1340 }
1341 }
1342 else {
1343 Blast_MessageWrite(blast_msg, eBlastSevError,
1344 kBlastMessageNoContext,
1345 "Word-size must be less "
1346 "than 6 for protein comparison");
1347 return BLASTERR_OPTION_VALUE_INVALID;
1348 }
1349 }
1350
1351 if (program_number != eBlastTypeBlastn &&
1352 program_number != eBlastTypeMapping &&
1353 options->lut_type == eMBLookupTable)
1354 {
1355 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1356 "Megablast lookup table only supported with blastn");
1357 return BLASTERR_OPTION_PROGRAM_INVALID;
1358 }
1359
1360 if (program_number == eBlastTypeBlastp ||
1361 program_number == eBlastTypeTblastn ||
1362 program_number == eBlastTypeBlastx)
1363 {
1364 if (options->word_size > 5 &&
1365 options->lut_type != eCompressedAaLookupTable) {
1366 Blast_MessageWrite(blast_msg, eBlastSevError,
1367 kBlastMessageNoContext,
1368 "Blastp, Blastx or Tblastn with word size"
1369 " > 5 requires a "
1370 "compressed alphabet lookup table");
1371 return BLASTERR_OPTION_VALUE_INVALID;
1372 }
1373 else if (options->lut_type == eCompressedAaLookupTable &&
1374 options->word_size != 6 && options->word_size != 7) {
1375 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1376 "Compressed alphabet lookup table requires "
1377 "word size 6 or 7");
1378 return BLASTERR_OPTION_VALUE_INVALID;
1379 }
1380 }
1381
1382 if (/*program_number == eBlastTypeBlastn &&*/
1383 Blast_ProgramIsNucleotide(program_number) &&
1384 !Blast_QueryIsPattern(program_number) &&
1385 options->mb_template_length > 0) {
1386 if (!s_DiscWordOptionsValidate(options->word_size,
1387 options->mb_template_length,
1388 options->mb_template_type,
1389 blast_msg)) {
1390 return BLASTERR_OPTION_VALUE_INVALID;
1391 } else if (options->lut_type != eMBLookupTable) {
1392 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1393 "Invalid lookup table type for discontiguous Mega BLAST");
1394 return BLASTERR_OPTION_VALUE_INVALID;
1395 }
1396 }
1397
1398 if (!Blast_ProgramIsNucleotide(program_number) && options->db_filter) {
1399 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1400 "The limit_lookup option can only be used for "
1401 "nucleotide searches");
1402 return BLASTERR_OPTION_VALUE_INVALID;
1403 }
1404
1405 if (options->db_filter && options->word_size < 16) {
1406 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1407 "The limit_lookup option can only be used with "
1408 "word size >= 16");
1409 return BLASTERR_OPTION_VALUE_INVALID;
1410 }
1411
1412 return 0;
1413 }
1414
1415 BlastHitSavingOptions*
BlastHitSavingOptionsFree(BlastHitSavingOptions * options)1416 BlastHitSavingOptionsFree(BlastHitSavingOptions* options)
1417
1418 {
1419 if (options) {
1420 options->hsp_filt_opt = BlastHSPFilteringOptionsFree(options->hsp_filt_opt);
1421 }
1422 sfree(options);
1423 return NULL;
1424 }
1425
1426
BlastHitSavingOptionsNew(EBlastProgramType program_number,BlastHitSavingOptions ** options,Boolean gapped_calculation)1427 Int2 BlastHitSavingOptionsNew(EBlastProgramType program_number,
1428 BlastHitSavingOptions** options,
1429 Boolean gapped_calculation)
1430 {
1431 *options = (BlastHitSavingOptions*) calloc(1, sizeof(BlastHitSavingOptions));
1432
1433 if (*options == NULL)
1434 return BLASTERR_INVALIDPARAM;
1435
1436 (*options)->hitlist_size = BLAST_HITLIST_SIZE;
1437 (*options)->expect_value = BLAST_EXPECT_VALUE;
1438 (*options)->program_number = program_number;
1439
1440 // Initialize mask_level parameter -RMH-
1441 (*options)->mask_level = 101;
1442
1443 /* By default, sum statistics is used for all translated searches
1444 * (except RPS BLAST), and for all ungapped searches.
1445 */
1446 if (program_number == eBlastTypeRpsTblastn) {
1447 (*options)->do_sum_stats = FALSE;
1448 } else if (!gapped_calculation ||
1449 Blast_QueryIsTranslated(program_number) ||
1450 Blast_SubjectIsTranslated(program_number)) {
1451 (*options)->do_sum_stats = TRUE;
1452 } else {
1453 (*options)->do_sum_stats = FALSE;
1454 }
1455
1456 (*options)->hsp_filt_opt = NULL;
1457
1458 (*options)->max_edit_distance = INT4_MAX;
1459
1460 return 0;
1461
1462 }
1463
1464 Int2
BLAST_FillHitSavingOptions(BlastHitSavingOptions * options,double evalue,Int4 hitlist_size,Boolean is_gapped,Int4 culling_limit,Int4 min_diag_separation)1465 BLAST_FillHitSavingOptions(BlastHitSavingOptions* options,
1466 double evalue, Int4 hitlist_size,
1467 Boolean is_gapped, Int4 culling_limit,
1468 Int4 min_diag_separation)
1469 {
1470 if (!options)
1471 return BLASTERR_INVALIDPARAM;
1472
1473 if (hitlist_size)
1474 options->hitlist_size = hitlist_size;
1475 if (evalue)
1476 options->expect_value = evalue;
1477 if (min_diag_separation)
1478 options->min_diag_separation = min_diag_separation;
1479 options->culling_limit = culling_limit;
1480 options->hsp_filt_opt = NULL;
1481 options->max_edit_distance = INT4_MAX;
1482
1483 return 0;
1484
1485 }
1486
1487 Int2
BlastHitSavingOptionsValidate(EBlastProgramType program_number,const BlastHitSavingOptions * options,Blast_Message ** blast_msg)1488 BlastHitSavingOptionsValidate(EBlastProgramType program_number,
1489 const BlastHitSavingOptions* options, Blast_Message* *blast_msg)
1490 {
1491 if (options == NULL)
1492 return BLASTERR_INVALIDPARAM;
1493
1494 if (options->hitlist_size < 1)
1495 {
1496 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1497 "No hits are being saved");
1498 return BLASTERR_OPTION_VALUE_INVALID;
1499 }
1500
1501 if (options->expect_value <= 0.0 && options->cutoff_score <= 0)
1502 {
1503 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1504 "expect value or cutoff score must be greater than zero");
1505 return BLASTERR_OPTION_VALUE_INVALID;
1506 }
1507
1508 if (options->longest_intron != 0 &&
1509 program_number != eBlastTypeTblastn &&
1510 program_number != eBlastTypePsiTblastn &&
1511 program_number != eBlastTypeBlastx &&
1512 program_number != eBlastTypeMapping) {
1513 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1514 "Uneven gap linking of HSPs is allowed for blastx, "
1515 "tblastn, and psitblastn only");
1516 return BLASTERR_OPTION_PROGRAM_INVALID;
1517 }
1518
1519 if (options->culling_limit < 0)
1520 {
1521 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1522 "culling limit must be greater than or equal to zero");
1523 return BLASTERR_OPTION_VALUE_INVALID;
1524 }
1525
1526 if (options->hsp_filt_opt) {
1527 if (BlastHSPFilteringOptionsValidate(options->hsp_filt_opt) != 0) {
1528 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1529 "HSP Filtering options invalid");
1530 return BLASTERR_OPTION_VALUE_INVALID;
1531 }
1532 }
1533
1534 return 0;
1535 }
1536
PSIBlastOptionsNew(PSIBlastOptions ** psi_options)1537 Int2 PSIBlastOptionsNew(PSIBlastOptions** psi_options)
1538 {
1539 PSIBlastOptions* options = NULL;
1540
1541 if ( !psi_options )
1542 return BLASTERR_INVALIDPARAM;
1543
1544 options = (PSIBlastOptions*)calloc(1, sizeof(PSIBlastOptions));
1545 if ( !options )
1546 return BLASTERR_MEMORY;
1547
1548 *psi_options = options;
1549 options->inclusion_ethresh = PSI_INCLUSION_ETHRESH;
1550 options->pseudo_count = PSI_PSEUDO_COUNT_CONST;
1551 options->use_best_alignment = TRUE;
1552
1553 options->nsg_compatibility_mode = FALSE;
1554 options->impala_scaling_factor = kPSSM_NoImpalaScaling;
1555 options->ignore_unaligned_positions = FALSE;
1556
1557 return 0;
1558 }
1559
PSIBlastOptionsValidate(const PSIBlastOptions * psi_options,Blast_Message ** blast_msg)1560 Int2 PSIBlastOptionsValidate(const PSIBlastOptions* psi_options,
1561 Blast_Message** blast_msg)
1562 {
1563 Int2 retval = 1; /* assume failure */
1564
1565 if ( !psi_options ) {
1566 return retval;
1567 }
1568
1569 if (psi_options->pseudo_count < 0) {
1570 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1571 "Pseudo count must be greater than or equal to 0");
1572 return retval;
1573 }
1574
1575 if (psi_options->inclusion_ethresh <= 0.0) {
1576 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1577 "Inclusion threshold must be greater than 0");
1578 return retval;
1579 }
1580
1581 retval = 0;
1582 return retval;
1583 }
1584
PSIBlastOptionsFree(PSIBlastOptions * psi_options)1585 PSIBlastOptions* PSIBlastOptionsFree(PSIBlastOptions* psi_options)
1586 {
1587 sfree(psi_options);
1588 return NULL;
1589 }
1590
BlastDatabaseOptionsNew(BlastDatabaseOptions ** db_options)1591 Int2 BlastDatabaseOptionsNew(BlastDatabaseOptions** db_options)
1592 {
1593 BlastDatabaseOptions* options = NULL;
1594
1595 if ( !db_options ) {
1596 return BLASTERR_INVALIDPARAM;
1597 }
1598
1599 options = (BlastDatabaseOptions*) calloc(1, sizeof(BlastDatabaseOptions));
1600 if ( !options ) {
1601 return BLASTERR_MEMORY;
1602 }
1603
1604 options->genetic_code = BLAST_GENETIC_CODE;
1605 *db_options = options;
1606
1607 return 0;
1608 }
1609
1610 BlastDatabaseOptions*
BlastDatabaseOptionsFree(BlastDatabaseOptions * db_options)1611 BlastDatabaseOptionsFree(BlastDatabaseOptions* db_options)
1612 {
1613
1614 if (db_options == NULL)
1615 return NULL;
1616
1617 sfree(db_options);
1618 return NULL;
1619 }
1620
BLAST_InitDefaultOptions(EBlastProgramType program_number,LookupTableOptions ** lookup_options,QuerySetUpOptions ** query_setup_options,BlastInitialWordOptions ** word_options,BlastExtensionOptions ** ext_options,BlastHitSavingOptions ** hit_options,BlastScoringOptions ** score_options,BlastEffectiveLengthsOptions ** eff_len_options,PSIBlastOptions ** psi_options,BlastDatabaseOptions ** db_options)1621 Int2 BLAST_InitDefaultOptions(EBlastProgramType program_number,
1622 LookupTableOptions** lookup_options,
1623 QuerySetUpOptions** query_setup_options,
1624 BlastInitialWordOptions** word_options,
1625 BlastExtensionOptions** ext_options,
1626 BlastHitSavingOptions** hit_options,
1627 BlastScoringOptions** score_options,
1628 BlastEffectiveLengthsOptions** eff_len_options,
1629 PSIBlastOptions** psi_options,
1630 BlastDatabaseOptions** db_options)
1631 {
1632 Int2 status;
1633
1634 if ((status = LookupTableOptionsNew(program_number, lookup_options)))
1635 return status;
1636
1637 if ((status=BlastQuerySetUpOptionsNew(query_setup_options)))
1638 return status;
1639
1640 if ((status=BlastInitialWordOptionsNew(program_number, word_options)))
1641 return status;
1642
1643 if ((status=BlastScoringOptionsNew(program_number, score_options)))
1644 return status;
1645
1646 if ((status = BlastExtensionOptionsNew(program_number, ext_options,
1647 (*score_options)->gapped_calculation)))
1648 return status;
1649
1650 if ((status=BlastHitSavingOptionsNew(program_number, hit_options,
1651 (*score_options)->gapped_calculation)))
1652 return status;
1653
1654 if ((status=BlastEffectiveLengthsOptionsNew(eff_len_options)))
1655 return status;
1656
1657 if ((status=PSIBlastOptionsNew(psi_options)))
1658 return status;
1659
1660 if ((status=BlastDatabaseOptionsNew(db_options)))
1661 return status;
1662
1663 return 0;
1664
1665 }
1666
1667 /** Checks that the extension and scoring options are consistent with each other
1668 * @param program_number identifies the program [in]
1669 * @param ext_options the extension options [in]
1670 * @param score_options the scoring options [in]
1671 * @param blast_msg returns a message on errors. [in|out]
1672 * @return zero on success, an error code otherwise.
1673 */
s_BlastExtensionScoringOptionsValidate(EBlastProgramType program_number,const BlastExtensionOptions * ext_options,const BlastScoringOptions * score_options,Blast_Message ** blast_msg)1674 static Int2 s_BlastExtensionScoringOptionsValidate(EBlastProgramType program_number,
1675 const BlastExtensionOptions* ext_options,
1676 const BlastScoringOptions* score_options,
1677 Blast_Message* *blast_msg)
1678 {
1679 if (ext_options == NULL || score_options == NULL)
1680 return BLASTERR_INVALIDPARAM;
1681
1682 if (program_number == eBlastTypeBlastn)
1683 {
1684 if (score_options->gap_open == 0 && score_options->gap_extend == 0)
1685 {
1686 if (ext_options->ePrelimGapExt != eGreedyScoreOnly &&
1687 ext_options->eTbackExt != eGreedyTbck)
1688 {
1689 Blast_MessageWrite(blast_msg, eBlastSevWarning,
1690 kBlastMessageNoContext,
1691 "Greedy extension must be used if gap existence and extension options are zero");
1692 return BLASTERR_OPTION_VALUE_INVALID;
1693 }
1694 }
1695 }
1696
1697 if (program_number == eBlastTypeMapping) {
1698 if (ext_options->ePrelimGapExt != eJumperWithTraceback) {
1699
1700 Blast_MessageWrite(blast_msg, eBlastSevWarning,
1701 kBlastMessageNoContext,
1702 "Jumper extension must be used for mapping");
1703
1704 return BLASTERR_OPTION_VALUE_INVALID;
1705 }
1706 }
1707
1708 if (ext_options->compositionBasedStats != eNoCompositionBasedStats)
1709 {
1710 if (!Blast_QueryIsPssm(program_number) && program_number != eBlastTypeTblastn &&
1711 program_number != eBlastTypeBlastp &&
1712 program_number != eBlastTypeBlastx &&
1713 program_number != eBlastTypeRpsBlast &&
1714 program_number != eBlastTypeRpsTblastn &&
1715 program_number != eBlastTypePsiBlast) {
1716 Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
1717 "Compositional adjustments are only supported with blastp, blastx, or tblastn");
1718 return BLASTERR_OPTION_VALUE_INVALID;
1719 }
1720 if (!score_options->gapped_calculation) {
1721 Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
1722 "Compositional adjustments are only supported for gapped searches");
1723 return BLASTERR_OPTION_VALUE_INVALID;
1724 }
1725
1726 }
1727
1728 return 0;
1729 }
1730
1731
BLAST_ValidateOptions(EBlastProgramType program_number,const BlastExtensionOptions * ext_options,const BlastScoringOptions * score_options,const LookupTableOptions * lookup_options,const BlastInitialWordOptions * word_options,const BlastHitSavingOptions * hit_options,Blast_Message ** blast_msg)1732 Int2 BLAST_ValidateOptions(EBlastProgramType program_number,
1733 const BlastExtensionOptions* ext_options,
1734 const BlastScoringOptions* score_options,
1735 const LookupTableOptions* lookup_options,
1736 const BlastInitialWordOptions* word_options,
1737 const BlastHitSavingOptions* hit_options,
1738 Blast_Message* *blast_msg)
1739 {
1740 Int2 status = 0;
1741
1742 if ((status = BlastExtensionOptionsValidate(program_number, ext_options,
1743 blast_msg)) != 0)
1744 return status;
1745 if ((status = BlastScoringOptionsValidate(program_number, score_options,
1746 blast_msg)) != 0)
1747 return status;
1748 if ((status = LookupTableOptionsValidate(program_number,
1749 lookup_options, blast_msg)) != 0)
1750 return status;
1751 if ((status = BlastInitialWordOptionsValidate(program_number,
1752 word_options, blast_msg)) != 0)
1753 return status;
1754 if ((status = BlastHitSavingOptionsValidate(program_number, hit_options,
1755 blast_msg)) != 0)
1756 return status;
1757 if ((status = s_BlastExtensionScoringOptionsValidate(program_number, ext_options,
1758 score_options, blast_msg)) != 0)
1759 return status;
1760
1761 /* Word sizes larger than 5 are not suported for IDENTITY scoring matrix.
1762 Identity matrix is only supported for blastp and tblastn. */
1763 if (program_number == eBlastTypeBlastp ||
1764 program_number == eBlastTypeTblastn) {
1765
1766 char* matrix = BLAST_StrToUpper(score_options->matrix);
1767 Boolean is_identity = strcmp(matrix, "IDENTITY") == 0;
1768
1769 if (matrix) {
1770 free(matrix);
1771 }
1772
1773 if (lookup_options->word_size > 5 && is_identity) {
1774
1775 Blast_MessageWrite(blast_msg, eBlastSevError,
1776 kBlastMessageNoContext,
1777 "Word size larger than 5 is not supported for "
1778 "the identity scoring matrix");
1779
1780 return BLASTERR_OPTION_VALUE_INVALID;
1781 }
1782 }
1783
1784 return status;
1785 }
1786
BlastHSPBestHitOptionsNew(double overhang,double score_edge)1787 BlastHSPBestHitOptions* BlastHSPBestHitOptionsNew(double overhang, double score_edge)
1788 {
1789 BlastHSPBestHitOptions* retval =
1790 (BlastHSPBestHitOptions*) calloc(1, sizeof(BlastHSPBestHitOptions));
1791 retval->overhang = overhang;
1792 retval->score_edge = score_edge;
1793 return retval;
1794 }
1795
1796 Int2
BlastHSPBestHitOptionsValidate(const BlastHSPFilteringOptions * opts)1797 BlastHSPBestHitOptionsValidate(const BlastHSPFilteringOptions* opts)
1798 {
1799 Int2 retval = 0; /* assume success */
1800 BlastHSPBestHitOptions* best_hit = opts->best_hit;
1801
1802 if ( !best_hit ) {
1803 return retval;
1804 }
1805
1806 if (best_hit->overhang <= kBestHit_OverhangMin ||
1807 best_hit->overhang >= kBestHit_OverhangMax) {
1808 return -1;
1809 }
1810
1811 if (best_hit->score_edge <= kBestHit_ScoreEdgeMin ||
1812 best_hit->score_edge >= kBestHit_ScoreEdgeMax) {
1813 return -1;
1814 }
1815
1816 return retval;
1817 }
1818
BlastHSPBestHitOptionsFree(BlastHSPBestHitOptions * opt)1819 BlastHSPBestHitOptions* BlastHSPBestHitOptionsFree(BlastHSPBestHitOptions* opt)
1820 {
1821 if ( !opt ) {
1822 return NULL;
1823 }
1824 sfree(opt);
1825 return NULL;
1826 }
1827
BlastHSPCullingOptionsNew(int max)1828 BlastHSPCullingOptions* BlastHSPCullingOptionsNew(int max)
1829 {
1830 BlastHSPCullingOptions* retval =
1831 (BlastHSPCullingOptions*) calloc(1, sizeof(BlastHSPCullingOptions));
1832 retval->max_hits = max;
1833 return retval;
1834 }
1835
1836 Int2
BlastHSPCullingOptionsValidate(const BlastHSPFilteringOptions * opts)1837 BlastHSPCullingOptionsValidate(const BlastHSPFilteringOptions* opts)
1838 {
1839 Int2 retval = 0;
1840 BlastHSPCullingOptions* culling_opts = opts->culling_opts;
1841 if (!culling_opts)
1842 return retval;
1843
1844 if (culling_opts->max_hits < 0)
1845 return -1;
1846
1847 return retval;
1848 }
1849
1850 BlastHSPCullingOptions*
BlastHSPCullingOptionsFree(BlastHSPCullingOptions * culling_opts)1851 BlastHSPCullingOptionsFree(BlastHSPCullingOptions* culling_opts)
1852 {
1853 if (!culling_opts)
1854 return NULL;
1855
1856 sfree(culling_opts);
1857 return NULL;
1858 }
1859
1860
BlastHSPFilteringOptionsNew()1861 BlastHSPFilteringOptions* BlastHSPFilteringOptionsNew()
1862 {
1863 return (BlastHSPFilteringOptions*)calloc(1,
1864 sizeof(BlastHSPFilteringOptions));
1865 }
1866
1867 Int2
BlastHSPFilteringOptions_AddBestHit(BlastHSPFilteringOptions * filt_opts,BlastHSPBestHitOptions ** best_hit,EBlastStage stage)1868 BlastHSPFilteringOptions_AddBestHit(BlastHSPFilteringOptions* filt_opts,
1869 BlastHSPBestHitOptions** best_hit,
1870 EBlastStage stage)
1871 {
1872 if ( filt_opts == NULL || best_hit == NULL || *best_hit == NULL) {
1873 return 1;
1874 }
1875
1876 filt_opts->best_hit = *best_hit;
1877 *best_hit = NULL;
1878 filt_opts->best_hit_stage = stage;
1879
1880 return 0;
1881 }
1882
1883 Int2
BlastHSPFilteringOptions_AddCulling(BlastHSPFilteringOptions * filt_opts,BlastHSPCullingOptions ** culling,EBlastStage stage)1884 BlastHSPFilteringOptions_AddCulling(BlastHSPFilteringOptions* filt_opts,
1885 BlastHSPCullingOptions** culling,
1886 EBlastStage stage)
1887 {
1888 if ( filt_opts == NULL || culling == NULL || *culling == NULL) {
1889 return 1;
1890 }
1891
1892 filt_opts->culling_opts = *culling;
1893 *culling = NULL;
1894 filt_opts->culling_stage = stage;
1895
1896 return 0;
1897 }
1898
1899 Int2
BlastHSPFilteringOptionsValidate(const BlastHSPFilteringOptions * opts)1900 BlastHSPFilteringOptionsValidate(const BlastHSPFilteringOptions* opts)
1901 {
1902 Int2 retval = 0; /* assume success */
1903 Boolean writer_found = FALSE;
1904
1905 if ( (retval = BlastHSPBestHitOptionsValidate(opts)) != 0) {
1906 return retval;
1907 }
1908 if (opts->best_hit_stage & ePrelimSearch) {
1909 writer_found = TRUE;
1910 }
1911
1912 if ( (retval = BlastHSPCullingOptionsValidate(opts)) != 0) {
1913 return retval;
1914 }
1915 if ((opts->culling_stage & ePrelimSearch) && writer_found) {
1916 return 1;
1917 }
1918
1919 return retval;
1920 }
1921
1922 BlastHSPFilteringOptions*
BlastHSPFilteringOptionsFree(BlastHSPFilteringOptions * opts)1923 BlastHSPFilteringOptionsFree(BlastHSPFilteringOptions* opts)
1924 {
1925 if ( !opts ) {
1926 return NULL;
1927 }
1928 opts->best_hit = BlastHSPBestHitOptionsFree(opts->best_hit);
1929 opts->culling_opts = BlastHSPCullingOptionsFree(opts->culling_opts);
1930 opts->subject_besthit_opts = BlastHSPSubjectBestHitOptionsFree(opts->subject_besthit_opts);
1931 sfree(opts);
1932 return opts;
1933 }
1934
1935 BlastHSPSubjectBestHitOptions*
BlastHSPSubjectBestHitOptionsNew(Boolean isProtein)1936 BlastHSPSubjectBestHitOptionsNew(Boolean isProtein)
1937 {
1938 BlastHSPSubjectBestHitOptions* retval =
1939 (BlastHSPSubjectBestHitOptions*) calloc(1, sizeof(BlastHSPSubjectBestHitOptions));
1940 if(isProtein){
1941 retval->max_range_diff = DEFAULT_SUBJECT_BESTHIT_PROT_MAX_RANGE_DIFF;
1942 }
1943 else {
1944 retval->max_range_diff = DEFAULT_SUBJECT_BESTHIT_NUCL_MAX_RANGE_DIFF;
1945 }
1946 return retval;
1947 }
1948
1949 Int2
BlastHSPSubjectBestHitOptionsValidate(const BlastHSPFilteringOptions * opts)1950 BlastHSPSubjectBestHitOptionsValidate(const BlastHSPFilteringOptions* opts)
1951 {
1952 Int2 retval = 0;
1953 BlastHSPSubjectBestHitOptions* besthit_opts = opts->subject_besthit_opts;
1954 if (!besthit_opts)
1955 return retval;
1956
1957 return retval;
1958 }
1959
1960 BlastHSPSubjectBestHitOptions*
BlastHSPSubjectBestHitOptionsFree(BlastHSPSubjectBestHitOptions * subject_besthit_opts)1961 BlastHSPSubjectBestHitOptionsFree(BlastHSPSubjectBestHitOptions* subject_besthit_opts)
1962 {
1963 if (!subject_besthit_opts)
1964 return NULL;
1965
1966 sfree(subject_besthit_opts);
1967 return NULL;
1968 }
1969
1970 Int2
BlastHSPFilteringOptions_AddSubjectBestHit(BlastHSPFilteringOptions * filt_opts,BlastHSPSubjectBestHitOptions ** subject_besthit)1971 BlastHSPFilteringOptions_AddSubjectBestHit(BlastHSPFilteringOptions* filt_opts,
1972 BlastHSPSubjectBestHitOptions** subject_besthit)
1973 {
1974 if ( filt_opts == NULL || subject_besthit == NULL || *subject_besthit == NULL) {
1975 return 1;
1976 }
1977
1978 filt_opts->subject_besthit_opts = *subject_besthit;
1979 *subject_besthit = NULL;
1980
1981 return 0;
1982 }
1983
1984
1985
1986