1 /* $Id: blast_options.c 567495 2018-07-19 13:19:39Z fongah2 $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  */
26 
27 /** @file blast_options.c
28  *  The structures and functions in blast_options.[ch] should be used to specify
29  *  user preferences.  The options structures should not be changed by the BLAST code
30  *  but rather be read to determine user preferences.  When possible these structures
31  *  should be passed in as "const".
32  *
33  */
34 
35 #include <algo/blast/core/blast_options.h>
36 #include <algo/blast/core/blast_filter.h>
37 #include <algo/blast/core/blast_stat.h>
38 #include <algo/blast/composition_adjustment/composition_constants.h>
39 #include <algo/blast/core/hspfilter_collector.h>
40 #include <algo/blast/core/hspfilter_besthit.h>
41 #include <algo/blast/core/blast_util.h>
42 
43 const double kPSSM_NoImpalaScaling = 1.0;
44 
45 /** Declared in blast_def.h as extern const. */
46 const int kDustLevel = 20;
47 const int kDustWindow = 64;
48 const int kDustLinker = 1;
49 
SDustOptionsFree(SDustOptions * dust_options)50 SDustOptions* SDustOptionsFree(SDustOptions* dust_options)
51 {
52     if (dust_options)
53       sfree(dust_options);
54     return NULL;
55 }
56 
SDustOptionsNew(SDustOptions ** dust_options)57 Int2 SDustOptionsNew(SDustOptions* *dust_options)
58 {
59     if (dust_options == NULL)
60         return 1;
61 
62     *dust_options = (SDustOptions*) malloc(sizeof(SDustOptions));
63     (*dust_options)->level = kDustLevel;
64     (*dust_options)->window = kDustWindow;
65     (*dust_options)->linker = kDustLinker;
66 
67     return 0;
68 }
69 
SSegOptionsFree(SSegOptions * seg_options)70 SSegOptions* SSegOptionsFree(SSegOptions* seg_options)
71 {
72     if (seg_options)
73       sfree(seg_options);
74     return NULL;
75 }
76 
SSegOptionsNew(SSegOptions ** seg_options)77 Int2 SSegOptionsNew(SSegOptions* *seg_options)
78 {
79     if (seg_options == NULL)
80         return 1;
81 
82     *seg_options = (SSegOptions*) malloc(sizeof(SSegOptions));
83     (*seg_options)->window = kSegWindow;
84     (*seg_options)->locut = kSegLocut;
85     (*seg_options)->hicut = kSegHicut;
86 
87     return 0;
88 }
89 
SWindowMaskerOptionsNew(SWindowMaskerOptions ** winmask_options)90 Int2 SWindowMaskerOptionsNew(SWindowMaskerOptions ** winmask_options)
91 {
92     if (winmask_options) {
93         *winmask_options = (SWindowMaskerOptions*) calloc(1, sizeof(SWindowMaskerOptions));
94         if (*winmask_options == NULL)
95             return BLASTERR_MEMORY;
96 
97         (*winmask_options)->taxid = 0;
98         (*winmask_options)->database = NULL;
99         return 0;
100     }
101     return 1;
102 }
103 
SWindowMaskerOptionsFree(SWindowMaskerOptions * winmask_options)104 SWindowMaskerOptions* SWindowMaskerOptionsFree(SWindowMaskerOptions* winmask_options)
105 {
106     if (winmask_options)
107     {
108         if (winmask_options->database)
109         {
110             sfree(winmask_options->database);
111         }
112         sfree(winmask_options);
113     }
114     return NULL;
115 }
116 
SRepeatFilterOptionsFree(SRepeatFilterOptions * repeat_options)117 SRepeatFilterOptions* SRepeatFilterOptionsFree(SRepeatFilterOptions* repeat_options)
118 {
119     if (repeat_options)
120     {
121         sfree(repeat_options->database);
122         sfree(repeat_options);
123     }
124     return NULL;
125 }
126 
SRepeatFilterOptionsNew(SRepeatFilterOptions ** repeat_options)127 Int2 SRepeatFilterOptionsNew(SRepeatFilterOptions* *repeat_options)
128 {
129 
130     if (repeat_options == NULL)
131         return 1;
132 
133     *repeat_options = (SRepeatFilterOptions*) calloc(1, sizeof(SRepeatFilterOptions));
134     if (*repeat_options == NULL)
135         return BLASTERR_MEMORY;
136 
137     (*repeat_options)->database = strdup(kDefaultRepeatFilterDb);
138 
139     return 0;
140 }
141 
SRepeatFilterOptionsResetDB(SRepeatFilterOptions ** repeat_options,const char * db)142 Int2 SRepeatFilterOptionsResetDB(SRepeatFilterOptions* *repeat_options, const char* db)
143 {
144     Int2 status=0;
145 
146     if (*repeat_options == NULL)
147       status = SRepeatFilterOptionsNew(repeat_options);
148 
149     if (status)
150       return status;
151 
152     sfree((*repeat_options)->database);
153     (*repeat_options)->database = strdup(db);
154 
155     return status;
156 }
157 
SWindowMaskerOptionsResetDB(SWindowMaskerOptions ** winmask_options,const char * db)158 Int2 SWindowMaskerOptionsResetDB(SWindowMaskerOptions ** winmask_options, const char* db)
159 {
160     Int2 status=0;
161 
162     if (*winmask_options == NULL)
163         status = SWindowMaskerOptionsNew(winmask_options);
164 
165     if (status)
166         return status;
167 
168     sfree((*winmask_options)->database);
169 
170     if (db) {
171         (*winmask_options)->database = strdup(db);
172     }
173 
174     return status;
175 }
176 
SReadQualityOptionsFree(SReadQualityOptions * read_quality_options)177 SReadQualityOptions* SReadQualityOptionsFree(
178                                     SReadQualityOptions* read_quality_options)
179 {
180     if (read_quality_options) {
181         free(read_quality_options);
182     }
183 
184     return NULL;
185 }
186 
SReadQualityOptionsNew(SReadQualityOptions ** read_quality_options)187 Int2 SReadQualityOptionsNew(SReadQualityOptions** read_quality_options)
188 {
189     if (!read_quality_options) {
190         return 1;
191     }
192 
193     *read_quality_options = calloc(1, sizeof(SReadQualityOptions));
194     if (!*read_quality_options) {
195         return 1;
196     }
197 
198     (*read_quality_options)->frac_ambig = 0.5;
199     (*read_quality_options)->entropy = 16;
200 
201     return 0;
202 }
203 
SBlastFilterOptionsFree(SBlastFilterOptions * filter_options)204 SBlastFilterOptions* SBlastFilterOptionsFree(SBlastFilterOptions* filter_options)
205 {
206     if (filter_options)
207     {
208         filter_options->dustOptions =
209             SDustOptionsFree(filter_options->dustOptions);
210         filter_options->segOptions =
211             SSegOptionsFree(filter_options->segOptions);
212         filter_options->repeatFilterOptions =
213             SRepeatFilterOptionsFree(filter_options->repeatFilterOptions);
214         filter_options->windowMaskerOptions =
215             SWindowMaskerOptionsFree(filter_options->windowMaskerOptions);
216         filter_options->readQualityOptions =
217             SReadQualityOptionsFree(filter_options->readQualityOptions);
218         sfree(filter_options);
219     }
220 
221     return NULL;
222 }
223 
SBlastFilterOptionsNew(SBlastFilterOptions ** filter_options,EFilterOptions type)224 Int2 SBlastFilterOptionsNew(SBlastFilterOptions* *filter_options,  EFilterOptions type)
225 {
226     Int2 status = 0;
227 
228     if (filter_options)
229     {
230         *filter_options = (SBlastFilterOptions*) calloc(1, sizeof(SBlastFilterOptions));
231         (*filter_options)->mask_at_hash = FALSE;
232         if (type == eSeg)
233           SSegOptionsNew(&((*filter_options)->segOptions));
234         if (type == eDust || type == eDustRepeats)
235           SDustOptionsNew(&((*filter_options)->dustOptions));
236         if (type == eRepeats || type == eDustRepeats)
237           SRepeatFilterOptionsNew(&((*filter_options)->repeatFilterOptions));
238     }
239     else
240         status = 1;
241 
242     return status;
243 }
244 
245 
246 /** Merges together two sets of dust options, choosing the most non-default one.
247  *
248  * @param opt1 first set to be merged [in]
249  * @param opt2 second set to be merged [in]
250  * @return the merged options.
251  */
s_MergeDustOptions(const SDustOptions * opt1,const SDustOptions * opt2)252 static SDustOptions* s_MergeDustOptions(const SDustOptions* opt1, const SDustOptions* opt2)
253 {
254      SDustOptions* retval = NULL;
255 
256      if (!opt1 && !opt2)
257          return NULL;
258 
259      SDustOptionsNew(&retval);
260 
261      if (opt1 && !opt2)
262      {
263            retval->level = opt1->level;
264            retval->window = opt1->window;
265            retval->linker = opt1->linker;
266      }
267      else if (!opt1 && opt2)
268      {
269            retval->level = opt2->level;
270            retval->window = opt2->window;
271            retval->linker = opt2->linker;
272      }
273      else
274      {
275           retval->level = (opt1->level != kDustLevel) ? opt1->level : opt2->level;
276           retval->window = (opt1->window != kDustWindow) ? opt1->window : opt2->window;
277           retval->linker = (opt1->linker != kDustLinker) ? opt1->linker : opt2->linker;
278      }
279 
280      return retval;
281 }
282 
283 
284 /** Merges together two sets of SEG options, choosing the most non-default one.
285  *
286  * @param opt1 first set to be merged [in]
287  * @param opt2 second set to be merged [in]
288  * @return the merged options.
289  */
s_MergeSegOptions(const SSegOptions * opt1,const SSegOptions * opt2)290 static SSegOptions* s_MergeSegOptions(const SSegOptions* opt1, const SSegOptions* opt2)
291 {
292     SSegOptions* retval = NULL;
293 
294     if (!opt1 && !opt2)
295         return NULL;
296 
297     SSegOptionsNew(&retval);
298 
299     if (opt1 && !opt2)
300     {
301          retval->window = opt1->window;
302          retval->locut = opt1->locut;
303          retval->hicut = opt1->hicut;
304     }
305     else if (!opt1 && opt2)
306     {
307          retval->window = opt2->window;
308          retval->locut = opt2->locut;
309          retval->hicut = opt2->hicut;
310     }
311     else
312     {
313          retval->window = (opt1->window != kSegWindow) ? opt1->window : opt2->window;
314          retval->locut = (opt1->locut != kSegLocut) ? opt1->locut : opt2->locut;
315          retval->hicut = (opt1->hicut != kSegHicut) ? opt1->hicut : opt2->hicut;
316     }
317     return retval;
318 }
319 
320 /** Merges together two sets of repeat filter options, choosing the most non-default one.
321  *
322  * @param opt1 first set to be merged [in]
323  * @param opt2 second set to be merged [in]
324  * @return the merged options.
325  */
s_MergeRepeatOptions(const SRepeatFilterOptions * opt1,const SRepeatFilterOptions * opt2)326 static SRepeatFilterOptions* s_MergeRepeatOptions(const SRepeatFilterOptions* opt1, const SRepeatFilterOptions* opt2)
327 {
328       SRepeatFilterOptions* retval = NULL;
329 
330       if (!opt1 && !opt2)
331          return NULL;
332 
333       SRepeatFilterOptionsNew(&retval);
334 
335       if (opt1 && !opt2)
336       {
337            SRepeatFilterOptionsResetDB(&retval, opt1->database);
338       }
339       else if (!opt1 && opt2)
340       {
341            SRepeatFilterOptionsResetDB(&retval, opt2->database);
342       }
343       else
344       {  /* TODO : handle different db's. */
345            SRepeatFilterOptionsResetDB(&retval, opt2->database);
346       }
347       return retval;
348 }
349 
350 /** Merges together two sets of window masker options, choosing the most non-default one.
351  *
352  * @param opt1 first set to be merged [in]
353  * @param opt2 second set to be merged [in]
354  * @return the merged options.
355  */
356 static SWindowMaskerOptions*
s_MergeWindowMaskerOptions(const SWindowMaskerOptions * opt1,const SWindowMaskerOptions * opt2)357 s_MergeWindowMaskerOptions(const SWindowMaskerOptions* opt1,
358                            const SWindowMaskerOptions* opt2)
359 {
360     SWindowMaskerOptions* retval = NULL;
361     const SWindowMaskerOptions* src = NULL;
362     Boolean have1 = FALSE, have2 = FALSE;
363 
364     have1 = opt1 && (opt1->database || opt1->taxid);
365     have2 = opt2 && (opt2->database || opt2->taxid);
366 
367     if (! (have1 || have2))
368         return NULL;
369 
370     if (have1 && ! have2) {
371         src = opt1;
372     } else if (! have1 && have2) {
373         src = opt2;
374     } else {
375         // We have data structures with some kind of content, so
376         // prefer structure 2 as repeat filter options do.
377         src = opt2;
378     }
379 
380     ASSERT(src);
381     ASSERT(src->database || src->taxid);
382 
383     SWindowMaskerOptionsNew(&retval);
384     SWindowMaskerOptionsResetDB(& retval, src->database);
385     retval->taxid = src->taxid;
386 
387     return retval;
388 }
389 
SBlastFilterOptionsMerge(SBlastFilterOptions ** combined,const SBlastFilterOptions * opt1,const SBlastFilterOptions * opt2)390 Int2 SBlastFilterOptionsMerge(SBlastFilterOptions** combined, const SBlastFilterOptions* opt1,
391        const SBlastFilterOptions* opt2)
392 {
393      SBlastFilterOptions* retval = NULL;
394      Int2 status = 0;
395 
396      *combined = NULL;
397 
398      if (opt1 == NULL && opt2 == NULL)
399          return 0;
400 
401      status = SBlastFilterOptionsNew(&retval, eEmpty);
402      if (status != 0)
403          return status;
404 
405      *combined = retval;
406 
407      if ((opt1 && opt1->mask_at_hash) || (opt2 && opt2->mask_at_hash))
408          retval->mask_at_hash = TRUE;
409 
410      retval->dustOptions =
411          s_MergeDustOptions(opt1 ? opt1->dustOptions : NULL, opt2 ? opt2->dustOptions : NULL);
412      retval->segOptions =
413          s_MergeSegOptions(opt1 ? opt1->segOptions : NULL, opt2 ? opt2->segOptions : NULL);
414      retval->repeatFilterOptions =
415          s_MergeRepeatOptions(opt1 ? opt1->repeatFilterOptions : NULL, opt2 ? opt2->repeatFilterOptions : NULL);
416      retval->windowMaskerOptions =
417          s_MergeWindowMaskerOptions(opt1 ? opt1->windowMaskerOptions : NULL, opt2 ? opt2->windowMaskerOptions : NULL);
418 
419      return 0;
420 }
421 
SBlastFilterOptionsNoFiltering(const SBlastFilterOptions * filter_options)422 Boolean SBlastFilterOptionsNoFiltering(const SBlastFilterOptions* filter_options)
423 {
424        if (filter_options == NULL)
425           return TRUE;
426 
427        return filter_options->dustOptions == NULL &&
428            filter_options->segOptions == NULL &&
429            filter_options->repeatFilterOptions == NULL &&
430            filter_options->windowMaskerOptions == NULL;
431 }
432 
SBlastFilterOptionsMaskAtHash(const SBlastFilterOptions * filter_options)433 Boolean SBlastFilterOptionsMaskAtHash(const SBlastFilterOptions* filter_options)
434 {
435        if (filter_options == NULL)
436           return FALSE;
437 
438        return filter_options->mask_at_hash;
439 }
440 
SBlastFilterOptionsValidate(EBlastProgramType program_number,const SBlastFilterOptions * filter_options,Blast_Message ** blast_message)441 Int2 SBlastFilterOptionsValidate(EBlastProgramType program_number, const SBlastFilterOptions* filter_options, Blast_Message* *blast_message)
442 {
443        Int2 status = 0;
444 
445        if (filter_options == NULL)
446        {
447            Blast_MessageWrite(blast_message, eBlastSevWarning, kBlastMessageNoContext,
448               "SBlastFilterOptionsValidate: NULL filter_options");
449            return BLASTERR_INVALIDPARAM;
450        }
451 
452        if (filter_options->repeatFilterOptions)
453        {
454            if (program_number != eBlastTypeBlastn &&
455                program_number != eBlastTypeMapping)
456            {
457                if (blast_message)
458                   Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
459                    "SBlastFilterOptionsValidate: Repeat filtering only supported with blastn");
460                return  BLASTERR_OPTION_PROGRAM_INVALID;
461            }
462            if (filter_options->repeatFilterOptions->database == NULL ||
463                strlen(filter_options->repeatFilterOptions->database) == 0)
464            {
465                if (blast_message)
466                   Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
467                    "SBlastFilterOptionsValidate: No repeat database specified for repeat filtering");
468                return BLASTERR_INVALIDPARAM;
469            }
470        }
471 
472        if (filter_options->dustOptions)
473        {
474            if (program_number != eBlastTypeBlastn &&
475                program_number != eBlastTypeMapping)
476            {
477                if (blast_message)
478                   Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
479                    "SBlastFilterOptionsValidate: Dust filtering only supported with blastn");
480                return BLASTERR_OPTION_PROGRAM_INVALID;
481            }
482        }
483 
484        if (filter_options->segOptions)
485        {
486            if (program_number == eBlastTypeBlastn &&
487                program_number != eBlastTypeMapping)
488            {
489                if (blast_message)
490                   Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
491                    "SBlastFilterOptionsValidate: SEG filtering is not supported with blastn");
492                return BLASTERR_OPTION_PROGRAM_INVALID;
493            }
494        }
495 
496        return status;
497 }
498 
499 
500 QuerySetUpOptions*
BlastQuerySetUpOptionsFree(QuerySetUpOptions * options)501 BlastQuerySetUpOptionsFree(QuerySetUpOptions* options)
502 
503 {
504    if (options)
505    {
506        sfree(options->filter_string);
507        options->filtering_options = SBlastFilterOptionsFree(options->filtering_options);
508        sfree(options);
509    }
510    return NULL;
511 }
512 
513 Int2
BlastQuerySetUpOptionsNew(QuerySetUpOptions ** options)514 BlastQuerySetUpOptionsNew(QuerySetUpOptions* *options)
515 {
516    Int2 status = 0;
517 
518    if (options == NULL)
519       return BLASTERR_INVALIDPARAM;
520 
521    *options = (QuerySetUpOptions*) calloc(1, sizeof(QuerySetUpOptions));
522 
523    if (*options == NULL)
524       return BLASTERR_MEMORY;
525 
526    (*options)->genetic_code = BLAST_GENETIC_CODE;
527 
528    /** @todo the code below should be deprecated */
529    status = SBlastFilterOptionsNew(&((*options)->filtering_options), eEmpty);
530 
531    return status;
532 }
533 
BLAST_FillQuerySetUpOptions(QuerySetUpOptions * options,EBlastProgramType program,const char * filter_string,Uint1 strand_option)534 Int2 BLAST_FillQuerySetUpOptions(QuerySetUpOptions* options,
535         EBlastProgramType program, const char *filter_string, Uint1 strand_option)
536 {
537    Int2 status = 0;
538 
539    if (options == NULL)
540       return BLASTERR_INVALIDPARAM;
541 
542    if (strand_option &&
543        (program == eBlastTypeBlastn || program == eBlastTypePhiBlastn ||
544         program == eBlastTypeBlastx || program == eBlastTypeTblastx ||
545         program == eBlastTypeMapping)) {
546       options->strand_option = strand_option;
547    }
548 
549    if (filter_string) {
550        /* Free whatever filter string has been set before. */
551        sfree(options->filter_string);
552        /* Free whatever filtering options have been set. */
553        options->filtering_options =  SBlastFilterOptionsFree(options->filtering_options);
554        /* Parse the filter_string for options, do not save the string. */
555        status = BlastFilteringOptionsFromString(program, filter_string,
556           &options->filtering_options, NULL);
557    }
558    return status;
559 }
560 
561 BlastInitialWordOptions*
BlastInitialWordOptionsFree(BlastInitialWordOptions * options)562 BlastInitialWordOptionsFree(BlastInitialWordOptions* options)
563 
564 {
565 
566 	sfree(options);
567 
568 	return NULL;
569 }
570 
571 
572 Int2
BlastInitialWordOptionsNew(EBlastProgramType program,BlastInitialWordOptions ** options)573 BlastInitialWordOptionsNew(EBlastProgramType program,
574    BlastInitialWordOptions* *options)
575 {
576    *options =
577       (BlastInitialWordOptions*) calloc(1, sizeof(BlastInitialWordOptions));
578    if (*options == NULL)
579       return BLASTERR_MEMORY;
580 
581    if (/*program != eBlastTypeBlastn &&
582          program != eBlastTypePhiBlastn */
583        !Blast_ProgramIsNucleotide(program)) {	/* protein-protein options. */
584       (*options)->window_size = BLAST_WINDOW_SIZE_PROT;
585       (*options)->x_dropoff = BLAST_UNGAPPED_X_DROPOFF_PROT;
586       (*options)->gap_trigger = BLAST_GAP_TRIGGER_PROT;
587    } else {
588       (*options)->window_size = BLAST_WINDOW_SIZE_NUCL;
589       (*options)->scan_range =  BLAST_SCAN_RANGE_NUCL;
590       (*options)->gap_trigger = BLAST_GAP_TRIGGER_NUCL;
591       (*options)->x_dropoff = BLAST_UNGAPPED_X_DROPOFF_NUCL;
592    }
593 
594    (*options)->program_number = program;
595 
596    return 0;
597 }
598 
599 
600 Int2
BlastInitialWordOptionsValidate(EBlastProgramType program_number,const BlastInitialWordOptions * options,Blast_Message ** blast_msg)601 BlastInitialWordOptionsValidate(EBlastProgramType program_number,
602    const BlastInitialWordOptions* options,
603    Blast_Message* *blast_msg)
604 {
605 
606    ASSERT(options);
607 
608    /* PHI-BLAST has no ungapped extension phase.  Megablast may not have it,
609     but generally does now. */
610    if (program_number != eBlastTypeBlastn  &&
611        program_number != eBlastTypeMapping &&
612        (!Blast_ProgramIsPhiBlast(program_number)) &&
613        options->x_dropoff <= 0.0)
614    {
615       Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
616                             "x_dropoff must be greater than zero");
617          return BLASTERR_OPTION_VALUE_INVALID;
618    }
619 
620    if (program_number == eBlastTypeBlastn &&
621        options->scan_range && !options->window_size)
622    {
623       Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
624                             "off_diagonal_range is only useful in 2-hit algorithm");
625          return BLASTERR_OPTION_VALUE_INVALID;
626    }
627 
628 
629    return 0;
630 }
631 
632 
633 Int2
BLAST_FillInitialWordOptions(BlastInitialWordOptions * options,EBlastProgramType program,Int4 window_size,double xdrop_ungapped)634 BLAST_FillInitialWordOptions(BlastInitialWordOptions* options,
635                 EBlastProgramType program, Int4 window_size,
636                 double xdrop_ungapped)
637 {
638    if (!options)
639       return BLASTERR_INVALIDPARAM;
640 
641    if (window_size != 0)
642       options->window_size = window_size;
643    if (xdrop_ungapped != 0)
644       options->x_dropoff = xdrop_ungapped;
645 
646    return 0;
647 }
648 
649 BlastExtensionOptions*
BlastExtensionOptionsFree(BlastExtensionOptions * options)650 BlastExtensionOptionsFree(BlastExtensionOptions* options)
651 
652 {
653 
654 	sfree(options);
655 
656 	return NULL;
657 }
658 
659 Int2
BlastExtensionOptionsNew(EBlastProgramType program,BlastExtensionOptions ** options,Boolean gapped)660 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions* *options, Boolean gapped)
661 
662 {
663 	*options = (BlastExtensionOptions*)
664            calloc(1, sizeof(BlastExtensionOptions));
665 
666 	if (*options == NULL)
667 		return BLASTERR_MEMORY;
668 
669 	if (/* program != eBlastTypeBlastn &&
670            program != eBlastTypePhiBlastn*/
671         !Blast_ProgramIsNucleotide(program)) /* protein-protein options. */
672 	{
673 		(*options)->gap_x_dropoff = BLAST_GAP_X_DROPOFF_PROT;
674 		(*options)->gap_x_dropoff_final =
675                    BLAST_GAP_X_DROPOFF_FINAL_PROT;
676     } else {
677         (*options)->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL;
678         (*options)->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
679     }
680 
681     (*options)->ePrelimGapExt = eDynProgScoreOnly;
682     (*options)->eTbackExt = eDynProgTbck;
683     (*options)->compositionBasedStats = eNoCompositionBasedStats;
684 
685     /** @todo how to determine this for PSI-BLAST bootstrap run (i.e. when
686      * program is blastp? */
687     if (gapped && (Blast_QueryIsPssm(program) && ! Blast_SubjectIsTranslated(program))) {
688         (*options)->compositionBasedStats = eCompositionBasedStats;
689     }
690 
691     (*options)->max_mismatches = 5;
692     (*options)->mismatch_window = 10;
693     (*options)->program_number = program;
694 
695 	return 0;
696 }
697 
698 Int2
BLAST_FillExtensionOptions(BlastExtensionOptions * options,EBlastProgramType program,Int4 greedy,double x_dropoff,double x_dropoff_final)699 BLAST_FillExtensionOptions(BlastExtensionOptions* options,
700    EBlastProgramType program, Int4 greedy, double x_dropoff,
701    double x_dropoff_final)
702 {
703    if (!options)
704       return BLASTERR_INVALIDPARAM;
705 
706    if (/*program == eBlastTypeBlastn || program == eBlastTypePhiBlastn*/
707        Blast_ProgramIsNucleotide(program)) {
708       if (greedy) {
709          options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_GREEDY;
710          options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
711          options->ePrelimGapExt = eGreedyScoreOnly;
712          options->eTbackExt = eGreedyTbck;
713       } else {
714          options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL;
715          options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
716          options->ePrelimGapExt = eDynProgScoreOnly;
717          options->eTbackExt = eDynProgTbck;
718       }
719    }
720 
721    if (Blast_QueryIsPssm(program) && ! Blast_SubjectIsTranslated(program)) {
722        options->compositionBasedStats = eCompositionBasedStats;
723    }
724 
725    if (x_dropoff)
726       options->gap_x_dropoff = x_dropoff;
727    if (x_dropoff_final) {
728       options->gap_x_dropoff_final = x_dropoff_final;
729    } else {
730       /* Final X-dropoff can't be smaller than preliminary X-dropoff */
731       options->gap_x_dropoff_final =
732          MAX(options->gap_x_dropoff_final, x_dropoff);
733    }
734 
735    return 0;
736 
737 }
738 
739 Int2
BlastExtensionOptionsValidate(EBlastProgramType program_number,const BlastExtensionOptions * options,Blast_Message ** blast_msg)740 BlastExtensionOptionsValidate(EBlastProgramType program_number,
741    const BlastExtensionOptions* options, Blast_Message* *blast_msg)
742 
743 {
744 	if (options == NULL)
745 		return  BLASTERR_INVALIDPARAM;
746 
747 	if (program_number != eBlastTypeBlastn &&
748         program_number != eBlastTypeMapping &&
749             (options->ePrelimGapExt == eGreedyScoreOnly ||
750              options->eTbackExt == eGreedyTbck))
751 	{
752 		Blast_MessageWrite(blast_msg, eBlastSevWarning,
753                                    kBlastMessageNoContext,
754                             "Greedy extension only supported for BLASTN");
755 			return BLASTERR_OPTION_PROGRAM_INVALID;
756 	}
757 
758         if ((options->ePrelimGapExt == eSmithWatermanScoreOnly &&
759              options->eTbackExt != eSmithWatermanTbckFull) ||
760             (options->ePrelimGapExt != eSmithWatermanScoreOnly &&
761              options->eTbackExt == eSmithWatermanTbckFull))
762 	{
763 		Blast_MessageWrite(blast_msg, eBlastSevWarning,
764                                    kBlastMessageNoContext,
765                            "Score-only and traceback Smith-Waterman must "
766                            "both be specified");
767 		return BLASTERR_OPTION_VALUE_INVALID;
768 	}
769 
770 	return 0;
771 }
772 
773 BlastScoringOptions*
BlastScoringOptionsFree(BlastScoringOptions * options)774 BlastScoringOptionsFree(BlastScoringOptions* options)
775 
776 {
777 	if (options == NULL)
778 		return NULL;
779 
780 	sfree(options->matrix);
781    sfree(options->matrix_path);
782 	sfree(options);
783 
784 	return NULL;
785 }
786 
787 Int2
BlastScoringOptionsNew(EBlastProgramType program_number,BlastScoringOptions ** options)788 BlastScoringOptionsNew(EBlastProgramType program_number, BlastScoringOptions* *options)
789 {
790    *options = (BlastScoringOptions*) calloc(1, sizeof(BlastScoringOptions));
791 
792    if (*options == NULL)
793       return BLASTERR_INVALIDPARAM;
794 
795    if (/*program_number != eBlastTypeBlastn &&
796          program_number != eBlastTypePhiBlastn*/
797        !Blast_ProgramIsNucleotide(program_number)) {/*protein-protein options.*/
798       (*options)->shift_pen = INT2_MAX;
799       (*options)->is_ooframe = FALSE;
800       (*options)->gap_open = BLAST_GAP_OPEN_PROT;
801       (*options)->gap_extend = BLAST_GAP_EXTN_PROT;
802       (*options)->matrix = strdup(BLAST_DEFAULT_MATRIX);
803    } else {	/* nucleotide-nucleotide options. */
804       (*options)->penalty = BLAST_PENALTY;
805       (*options)->reward = BLAST_REWARD;
806       /* This is correct except when greedy extension is used. In that case
807          these values would have to be reset. */
808       (*options)->gap_open = BLAST_GAP_OPEN_NUCL;
809       (*options)->gap_extend = BLAST_GAP_EXTN_NUCL;
810    }
811    if (program_number != eBlastTypeTblastx) {
812        (*options)->gapped_calculation = TRUE;
813    }
814    (*options)->program_number = program_number;
815    /* By default cross_match-like complexity adjusted scoring is
816       turned off.  RMBlastN is currently the only program to use this. -RMH */
817    (*options)->complexity_adjusted_scoring = FALSE;
818 
819    return 0;
820 }
821 
822 Int2
BLAST_FillScoringOptions(BlastScoringOptions * options,EBlastProgramType program_number,Boolean greedy_extension,Int4 penalty,Int4 reward,const char * matrix,Int4 gap_open,Int4 gap_extend)823 BLAST_FillScoringOptions(BlastScoringOptions* options,
824    EBlastProgramType program_number, Boolean greedy_extension, Int4 penalty, Int4 reward,
825    const char *matrix, Int4 gap_open, Int4 gap_extend)
826 {
827    if (!options)
828       return BLASTERR_INVALIDPARAM;
829 
830    if (/*program_number != eBlastTypeBlastn &&
831          program_number != eBlastTypePhiBlastn*/
832        !Blast_ProgramIsNucleotide(program_number)) {/* protein-protein options. */
833       /* If matrix name is not provided, keep the default "BLOSUM62" value filled in
834          BlastScoringOptionsNew, otherwise reset it. */
835       if (matrix)
836           BlastScoringOptionsSetMatrix(options, matrix);
837    } else {	/* nucleotide-nucleotide options. */
838       if (penalty)
839          options->penalty = penalty;
840       if (reward)
841          options->reward = reward;
842 
843       if (greedy_extension) {
844          options->gap_open = BLAST_GAP_OPEN_MEGABLAST;
845          options->gap_extend = BLAST_GAP_EXTN_MEGABLAST;
846       }	else {
847          options->gap_open = BLAST_GAP_OPEN_NUCL;
848          options->gap_extend = BLAST_GAP_EXTN_NUCL;
849       }
850    }
851    if (gap_open >= 0)
852       options->gap_open = gap_open;
853    if (gap_extend >= 0)
854       options->gap_extend = gap_extend;
855 
856    options->program_number = program_number;
857 
858    return 0;
859 }
860 
861 Int2
BlastScoringOptionsValidate(EBlastProgramType program_number,const BlastScoringOptions * options,Blast_Message ** blast_msg)862 BlastScoringOptionsValidate(EBlastProgramType program_number,
863    const BlastScoringOptions* options, Blast_Message* *blast_msg)
864 
865 {
866 	if (options == NULL)
867 		return BLASTERR_INVALIDPARAM;
868 
869         if (program_number == eBlastTypeTblastx && options->gapped_calculation)
870         {
871             Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
872                "Gapped search is not allowed for tblastx");
873 		return BLASTERR_OPTION_PROGRAM_INVALID;
874         }
875 
876         if (/*program_number == eBlastTypeBlastn || program_number == eBlastTypePhiBlastn*/
877             Blast_ProgramIsNucleotide(program_number))
878 	{
879            // A penalty/reward of 0/0 is a signal that this is rmblastn
880            // which allows specification of penalties as positive integers.
881            if ( ! ( options->penalty == 0 && options->reward == 0 ) )
882            {
883 		if (options->penalty >= 0)
884 		{
885 			Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
886                             "BLASTN penalty must be negative");
887 			return BLASTERR_OPTION_VALUE_INVALID;
888 		}
889 
890         /* !!! this is temporary until there is jumper or mapping options handle */
891         if (0 && options->gapped_calculation &&
892             !Blast_ProgramIsMapping(program_number) &&
893             !BLAST_CheckRewardPenaltyScores(options->reward, options->penalty))
894                 {
895 			Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
896                             "BLASTN reward/penalty combination not supported for gapped search");
897 			return BLASTERR_OPTION_VALUE_INVALID;
898                 }
899              }
900 
901              if (options->gapped_calculation && options->gap_open > 0 && options->gap_extend == 0)
902              {
903                      Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
904                         "BLASTN gap extension penalty cannot be 0");
905                      return BLASTERR_OPTION_VALUE_INVALID;
906              }
907 	}
908 	else
909 	{
910                 if (options->gapped_calculation && !Blast_ProgramIsRpsBlast(program_number))
911                 {
912                     Int2 status=0;
913                     Boolean std_matrix_only =
914                         (program_number != eBlastTypeBlastp &&
915                          program_number != eBlastTypeTblastn);
916                     if ((status=Blast_KarlinBlkGappedLoadFromTables(NULL, options->gap_open,
917                           options->gap_extend, options->matrix, std_matrix_only)) != 0)
918                      {
919 			if (status == 1)
920 			{
921 				char* buffer;
922 
923 				buffer = BLAST_PrintMatrixMessage(options->matrix,
924                                                   std_matrix_only);
925 
926                                 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext, buffer);
927 				sfree(buffer);
928 				return BLASTERR_OPTION_VALUE_INVALID;
929 
930 			}
931 			else if (status == 2)
932 			{
933 				char* buffer;
934 
935 				buffer = BLAST_PrintAllowedValues(options->matrix,
936                         options->gap_open, options->gap_extend);
937                                 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext, buffer);
938 				sfree(buffer);
939 				return BLASTERR_OPTION_VALUE_INVALID;
940 			}
941                     }
942 	       }
943 	}
944 
945 	if (program_number != eBlastTypeBlastx && program_number != eBlastTypeTblastn && options->is_ooframe)
946 	{
947             Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
948                "Out-of-frame only permitted for blastx and tblastn");
949             return  BLASTERR_OPTION_PROGRAM_INVALID;
950 	}
951 
952 	return 0;
953 }
954 
955 Int2
BlastScoringOptionsDup(BlastScoringOptions ** new_opt,const BlastScoringOptions * old_opt)956 BlastScoringOptionsDup(BlastScoringOptions* *new_opt, const BlastScoringOptions* old_opt)
957 {
958     if (old_opt == NULL || new_opt == NULL)
959        return BLASTERR_INVALIDPARAM;
960 
961     *new_opt = (BlastScoringOptions*) BlastMemDup(old_opt, sizeof(BlastScoringOptions));
962     if (*new_opt == NULL)
963        return BLASTERR_MEMORY;
964 
965     if (old_opt->matrix)
966        (*new_opt)->matrix = strdup(old_opt->matrix);
967 
968     if (old_opt->matrix_path)
969        (*new_opt)->matrix_path = strdup(old_opt->matrix_path);
970 
971     return 0;
972 }
973 
BlastScoringOptionsSetMatrix(BlastScoringOptions * opts,const char * matrix_name)974 Int2 BlastScoringOptionsSetMatrix(BlastScoringOptions* opts,
975                                   const char* matrix_name)
976 {
977     Uint4 i;
978 
979     if (matrix_name) {
980         sfree(opts->matrix);
981         opts->matrix = strdup(matrix_name);
982         /* Make it all upper case */
983         for (i=0; i<strlen(opts->matrix); ++i)
984             opts->matrix[i] = toupper((unsigned char) opts->matrix[i]);
985     }
986     return 0;
987 }
988 
989 BlastEffectiveLengthsOptions*
BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions * options)990 BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions* options)
991 
992 {
993    if (options == NULL)
994       return NULL;
995 
996    sfree(options->searchsp_eff);
997    sfree(options);
998    return NULL;
999 }
1000 
1001 
1002 Int2
BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions ** options)1003 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions* *options)
1004 
1005 {
1006     if (options == NULL) {
1007         return BLASTERR_INVALIDPARAM;
1008     }
1009 
1010     *options = (BlastEffectiveLengthsOptions*)
1011        calloc(1, sizeof(BlastEffectiveLengthsOptions));
1012 
1013     if (*options == NULL)
1014        return BLASTERR_MEMORY;
1015 
1016     return 0;
1017 }
1018 
1019 Boolean
BlastEffectiveLengthsOptions_IsSearchSpaceSet(const BlastEffectiveLengthsOptions * options)1020 BlastEffectiveLengthsOptions_IsSearchSpaceSet(const
1021                                               BlastEffectiveLengthsOptions*
1022                                               options)
1023 {
1024     int i;
1025     if ( !options || options->searchsp_eff == NULL) {
1026         return FALSE;
1027     }
1028 
1029     for (i = 0; i < options->num_searchspaces; i++) {
1030         if (options->searchsp_eff[i] != 0) {
1031             return TRUE;
1032         }
1033     }
1034     return FALSE;
1035 }
1036 
1037 Int2
BLAST_FillEffectiveLengthsOptions(BlastEffectiveLengthsOptions * options,Int4 dbseq_num,Int8 db_length,Int8 * searchsp_eff,Int4 num_searchsp)1038 BLAST_FillEffectiveLengthsOptions(BlastEffectiveLengthsOptions* options,
1039    Int4 dbseq_num, Int8 db_length, Int8* searchsp_eff, Int4 num_searchsp)
1040 {
1041    Int4 index;
1042    if (!options)
1043       return BLASTERR_INVALIDPARAM;
1044 
1045    if (num_searchsp > options->num_searchspaces) {
1046        options->num_searchspaces = num_searchsp;
1047        options->searchsp_eff = (Int8 *)realloc(options->searchsp_eff,
1048                                                num_searchsp * sizeof(Int8));
1049        if (options->searchsp_eff == NULL)
1050            return BLASTERR_MEMORY;
1051    }
1052 
1053    for (index = 0; index < options->num_searchspaces; index++)
1054       options->searchsp_eff[index] = searchsp_eff[index];
1055 
1056    options->dbseq_num = dbseq_num;
1057    options->db_length = db_length;
1058 
1059    return 0;
1060 }
1061 
1062 LookupTableOptions*
LookupTableOptionsFree(LookupTableOptions * options)1063 LookupTableOptionsFree(LookupTableOptions* options)
1064 
1065 {
1066 
1067       if (options == NULL)
1068           return NULL;
1069 
1070       sfree(options->phi_pattern);
1071 
1072 	sfree(options);
1073 	return NULL;
1074 }
1075 
1076 Int2
LookupTableOptionsNew(EBlastProgramType program_number,LookupTableOptions ** options)1077 LookupTableOptionsNew(EBlastProgramType program_number, LookupTableOptions* *options)
1078 {
1079    *options = (LookupTableOptions*) calloc(1, sizeof(LookupTableOptions));
1080 
1081    if (*options == NULL)
1082       return BLASTERR_INVALIDPARAM;
1083 
1084    switch (program_number) {
1085    case eBlastTypeMapping:
1086        (*options)->max_db_word_count = MAX_DB_WORD_COUNT_MAPPER;
1087    case eBlastTypeBlastn:
1088        /* Blastn default is megablast. */
1089        (*options)->word_size = BLAST_WORDSIZE_MEGABLAST;
1090        (*options)->lut_type = eMBLookupTable;
1091        break;
1092    case eBlastTypeRpsBlast: case eBlastTypeRpsTblastn:
1093        (*options)->word_size = BLAST_WORDSIZE_PROT;
1094        (*options)->lut_type = eRPSLookupTable;
1095 
1096        if (program_number == eBlastTypeRpsBlast)
1097            (*options)->threshold = BLAST_WORD_THRESHOLD_BLASTP;
1098        else
1099            (*options)->threshold = BLAST_WORD_THRESHOLD_TBLASTN;
1100        break;
1101    case eBlastTypePhiBlastn:
1102        (*options)->lut_type = ePhiNaLookupTable;
1103        break;
1104    case eBlastTypePhiBlastp:
1105        (*options)->lut_type = ePhiLookupTable;
1106        break;
1107    default:
1108        (*options)->word_size = BLAST_WORDSIZE_PROT;
1109        (*options)->lut_type = eAaLookupTable;
1110 
1111        if (program_number == eBlastTypeBlastp)
1112            (*options)->threshold = BLAST_WORD_THRESHOLD_BLASTP;
1113        else if (program_number == eBlastTypeBlastx)
1114            (*options)->threshold = BLAST_WORD_THRESHOLD_BLASTX;
1115        else if (program_number == eBlastTypeTblastn)
1116            (*options)->threshold = BLAST_WORD_THRESHOLD_TBLASTN;
1117        else if (program_number == eBlastTypeTblastx)
1118            (*options)->threshold = BLAST_WORD_THRESHOLD_TBLASTX;
1119        break;
1120    }
1121 
1122    (*options)->program_number = program_number;
1123    (*options)->stride = 0;
1124 
1125    return 0;
1126 }
1127 
1128 Int2
BLAST_FillLookupTableOptions(LookupTableOptions * options,EBlastProgramType program_number,Boolean is_megablast,double threshold,Int4 word_size)1129 BLAST_FillLookupTableOptions(LookupTableOptions* options,
1130    EBlastProgramType program_number, Boolean is_megablast,
1131    double threshold, Int4 word_size)
1132 {
1133    if (!options)
1134       return BLASTERR_INVALIDPARAM;
1135 
1136    if (program_number == eBlastTypeBlastn) {
1137 
1138       if (is_megablast)	{
1139          options->lut_type = eMBLookupTable;
1140          options->word_size = BLAST_WORDSIZE_MEGABLAST;
1141       }	else {
1142          options->lut_type = eNaLookupTable;
1143          options->word_size = BLAST_WORDSIZE_NUCL;
1144       }
1145    } else if (program_number == eBlastTypeMapping) {
1146        options->lut_type = eNaHashLookupTable;
1147        options->word_size = BLAST_WORDSIZE_MAPPER;
1148        options->max_db_word_count = MAX_DB_WORD_COUNT_MAPPER;
1149    } else {
1150       options->lut_type = eAaLookupTable;
1151    }
1152 
1153    /* if the supplied threshold is negative, disable neighboring words */
1154    if (threshold < 0)
1155       options->threshold = 0;
1156 
1157    /* if the supplied threshold is > 0, use it otherwise, use the default */
1158    if (threshold > 0)
1159       options->threshold = threshold;
1160 
1161    if (Blast_ProgramIsRpsBlast(program_number))
1162       options->lut_type = eRPSLookupTable;
1163    if (word_size)
1164       options->word_size = word_size;
1165    if ((program_number == eBlastTypeTblastn ||
1166         program_number == eBlastTypeBlastp ||
1167         program_number == eBlastTypeBlastx) &&
1168        word_size > 5)
1169        options->lut_type = eCompressedAaLookupTable;
1170 
1171    return 0;
1172 }
1173 
BLAST_GetSuggestedThreshold(EBlastProgramType program_number,const char * matrixName,double * threshold)1174 Int2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char* matrixName, double* threshold)
1175 {
1176 
1177     const double kB62_threshold = 11;
1178 
1179     if (program_number == eBlastTypeBlastn ||
1180         program_number == eBlastTypeMapping)
1181       return 0;
1182 
1183     if (matrixName == NULL)
1184       return BLASTERR_INVALIDPARAM;
1185 
1186     if(strcasecmp(matrixName, "BLOSUM62") == 0)
1187         *threshold = kB62_threshold;
1188     else if(strcasecmp(matrixName, "BLOSUM45") == 0)
1189         *threshold = 14;
1190     else if(strcasecmp(matrixName, "BLOSUM62_20") == 0)
1191         *threshold = 100;
1192     else if(strcasecmp(matrixName, "BLOSUM80") == 0)
1193         *threshold = 12;
1194     else if(strcasecmp(matrixName, "PAM30") == 0)
1195         *threshold = 16;
1196     else if(strcasecmp(matrixName, "PAM70") == 0)
1197         *threshold = 14;
1198     else if(strcasecmp(matrixName, "IDENTITY") == 0)
1199         *threshold = 27;
1200     else
1201         *threshold = kB62_threshold;
1202 
1203     if (Blast_SubjectIsTranslated(program_number) == TRUE)
1204         *threshold += 2;  /* Covers tblastn, tblastx, psi-tblastn rpstblastn. */
1205     else if (Blast_QueryIsTranslated(program_number) == TRUE)
1206         *threshold += 1;
1207 
1208     return 0;
1209 }
1210 
BLAST_GetSuggestedWindowSize(EBlastProgramType program_number,const char * matrixName,Int4 * window_size)1211 Int2 BLAST_GetSuggestedWindowSize(EBlastProgramType program_number, const char* matrixName, Int4* window_size)
1212 {
1213     const Int4 kB62_windowsize = 40;
1214 
1215     if (program_number == eBlastTypeBlastn ||
1216         program_number == eBlastTypeMapping)
1217       return 0;
1218 
1219     if (matrixName == NULL)
1220       return BLASTERR_INVALIDPARAM;
1221 
1222     if(strcasecmp(matrixName, "BLOSUM62") == 0)
1223         *window_size = kB62_windowsize;
1224     else if(strcasecmp(matrixName, "BLOSUM45") == 0)
1225         *window_size = 60;
1226     else if(strcasecmp(matrixName, "BLOSUM80") == 0)
1227         *window_size = 25;
1228     else if(strcasecmp(matrixName, "PAM30") == 0)
1229         *window_size = 15;
1230     else if(strcasecmp(matrixName, "PAM70") == 0)
1231         *window_size = 20;
1232     else
1233         *window_size = kB62_windowsize;
1234 
1235     return 0;
1236 }
1237 
1238 /** Validate options for the discontiguous word megablast
1239  * Word size must be 11 or 12; template length 16, 18 or 21;
1240  * template type 0, 1 or 2.
1241  * @param word_size Word size option [in]
1242  * @param template_length Discontiguous template length [in]
1243  * @param template_type Discontiguous template type [in]
1244  * @param blast_msg Used for storing error messages [in][out]
1245  * @return TRUE if options combination valid.
1246  */
1247 static Boolean
s_DiscWordOptionsValidate(Int4 word_size,Uint1 template_length,Uint1 template_type,Blast_Message ** blast_msg)1248 s_DiscWordOptionsValidate(Int4 word_size, Uint1 template_length,
1249                           Uint1 template_type,
1250                           Blast_Message** blast_msg)
1251 {
1252    if (template_length == 0)
1253       return TRUE;
1254 
1255 
1256    if (word_size != 11 && word_size != 12) {
1257       Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1258                          "Invalid discontiguous template parameters: word "
1259                          "size must be either 11 or 12");
1260       return FALSE;
1261    }
1262 
1263    if (template_length != 16 && template_length != 18 &&
1264        template_length != 21) {
1265       Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1266                          "Invalid discontiguous template parameters: "
1267                          "template length must be 16, 18, or 21");
1268       return FALSE;
1269    }
1270 
1271    if (template_type > 2) {
1272      /* should never fail coming from the C++ APIs as we represent these as
1273       * strings */
1274       Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1275                          "Invalid discontiguous template parameters: "
1276                          "template type must be 0, 1, or 2");
1277       return FALSE;
1278    }
1279 
1280    return TRUE;
1281 }
1282 
1283 Int2
LookupTableOptionsValidate(EBlastProgramType program_number,const LookupTableOptions * options,Blast_Message ** blast_msg)1284 LookupTableOptionsValidate(EBlastProgramType program_number,
1285    const LookupTableOptions* options, Blast_Message* *blast_msg)
1286 
1287 {
1288    const Boolean kPhiBlast = Blast_ProgramIsPhiBlast(program_number);
1289 
1290     if (options == NULL)
1291         return BLASTERR_INVALIDPARAM;
1292 
1293     if (options->phi_pattern && !kPhiBlast) {
1294         Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1295             "PHI pattern can be specified only for blastp and blastn");
1296         return BLASTERR_OPTION_PROGRAM_INVALID;
1297     }
1298 
1299     /* For PHI BLAST, the subsequent word size tests are not needed. */
1300     if (kPhiBlast)
1301         return 0;
1302 
1303     if (program_number != eBlastTypeBlastn &&
1304         program_number != eBlastTypeMapping &&
1305         (!Blast_ProgramIsRpsBlast(program_number)) &&
1306         options->threshold <= 0)
1307     {
1308         Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1309                          "Non-zero threshold required");
1310         return BLASTERR_OPTION_VALUE_INVALID;
1311     }
1312 
1313     if (options->word_size <= 0)
1314     {
1315         if ( !Blast_ProgramIsRpsBlast(program_number)) {
1316             Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1317                                      "Word-size must be greater than zero");
1318             return BLASTERR_OPTION_VALUE_INVALID;
1319         }
1320     } else if (/*program_number == eBlastTypeBlastn*/
1321                Blast_ProgramIsNucleotide(program_number) &&
1322                !Blast_QueryIsPattern(program_number) && options->word_size < 4)
1323     {
1324         Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1325                   "Word-size must be 4 or greater for nucleotide comparison");
1326         return BLASTERR_OPTION_VALUE_INVALID;
1327     } else if (program_number != eBlastTypeBlastn &&
1328                program_number != eBlastTypeMapping && options->word_size > 5)
1329     {
1330         if (program_number == eBlastTypeBlastp ||
1331             program_number == eBlastTypeTblastn ||
1332             program_number == eBlastTypeBlastx)
1333         {
1334             if (options->word_size > 7) {
1335                 Blast_MessageWrite(blast_msg, eBlastSevError,
1336                                    kBlastMessageNoContext,
1337                                    "Word-size must be less than "
1338                                    "8 for a tblastn, blastp or blastx search");
1339                 return BLASTERR_OPTION_VALUE_INVALID;
1340             }
1341         }
1342         else {
1343             Blast_MessageWrite(blast_msg, eBlastSevError,
1344                                kBlastMessageNoContext,
1345                                "Word-size must be less "
1346                                "than 6 for protein comparison");
1347             return BLASTERR_OPTION_VALUE_INVALID;
1348         }
1349     }
1350 
1351     if (program_number != eBlastTypeBlastn &&
1352         program_number != eBlastTypeMapping &&
1353        options->lut_type == eMBLookupTable)
1354     {
1355         Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1356                          "Megablast lookup table only supported with blastn");
1357         return BLASTERR_OPTION_PROGRAM_INVALID;
1358     }
1359 
1360     if (program_number == eBlastTypeBlastp ||
1361         program_number == eBlastTypeTblastn ||
1362         program_number == eBlastTypeBlastx)
1363     {
1364         if (options->word_size > 5 &&
1365             options->lut_type != eCompressedAaLookupTable) {
1366            Blast_MessageWrite(blast_msg, eBlastSevError,
1367                               kBlastMessageNoContext,
1368                               "Blastp, Blastx or Tblastn with word size"
1369                               " > 5 requires a "
1370                               "compressed alphabet lookup table");
1371            return BLASTERR_OPTION_VALUE_INVALID;
1372         }
1373         else if (options->lut_type == eCompressedAaLookupTable &&
1374                  options->word_size != 6 && options->word_size != 7) {
1375            Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1376                          "Compressed alphabet lookup table requires "
1377                          "word size 6 or 7");
1378            return BLASTERR_OPTION_VALUE_INVALID;
1379         }
1380     }
1381 
1382     if (/*program_number == eBlastTypeBlastn &&*/
1383         Blast_ProgramIsNucleotide(program_number) &&
1384         !Blast_QueryIsPattern(program_number) &&
1385         options->mb_template_length > 0) {
1386       if (!s_DiscWordOptionsValidate(options->word_size,
1387               options->mb_template_length,
1388               options->mb_template_type,
1389               blast_msg)) {
1390          return BLASTERR_OPTION_VALUE_INVALID;
1391       } else if (options->lut_type != eMBLookupTable) {
1392          Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1393             "Invalid lookup table type for discontiguous Mega BLAST");
1394          return BLASTERR_OPTION_VALUE_INVALID;
1395       }
1396    }
1397 
1398    if (!Blast_ProgramIsNucleotide(program_number) && options->db_filter) {
1399        Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1400                           "The limit_lookup option can only be used for "
1401                           "nucleotide searches");
1402        return BLASTERR_OPTION_VALUE_INVALID;
1403    }
1404 
1405    if (options->db_filter && options->word_size < 16) {
1406        Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1407                           "The limit_lookup option can only be used with "
1408                           "word size >= 16");
1409        return BLASTERR_OPTION_VALUE_INVALID;
1410    }
1411 
1412     return 0;
1413 }
1414 
1415 BlastHitSavingOptions*
BlastHitSavingOptionsFree(BlastHitSavingOptions * options)1416 BlastHitSavingOptionsFree(BlastHitSavingOptions* options)
1417 
1418 {
1419     if (options) {
1420         options->hsp_filt_opt = BlastHSPFilteringOptionsFree(options->hsp_filt_opt);
1421     }
1422     sfree(options);
1423     return NULL;
1424 }
1425 
1426 
BlastHitSavingOptionsNew(EBlastProgramType program_number,BlastHitSavingOptions ** options,Boolean gapped_calculation)1427 Int2 BlastHitSavingOptionsNew(EBlastProgramType program_number,
1428         BlastHitSavingOptions** options,
1429         Boolean gapped_calculation)
1430 {
1431    *options = (BlastHitSavingOptions*) calloc(1, sizeof(BlastHitSavingOptions));
1432 
1433    if (*options == NULL)
1434       return BLASTERR_INVALIDPARAM;
1435 
1436    (*options)->hitlist_size = BLAST_HITLIST_SIZE;
1437    (*options)->expect_value = BLAST_EXPECT_VALUE;
1438    (*options)->program_number = program_number;
1439 
1440    // Initialize mask_level parameter -RMH-
1441    (*options)->mask_level = 101;
1442 
1443    /* By default, sum statistics is used for all translated searches
1444     * (except RPS BLAST), and for all ungapped searches.
1445     */
1446    if (program_number == eBlastTypeRpsTblastn) {
1447 	   (*options)->do_sum_stats = FALSE;
1448    } else if (!gapped_calculation ||
1449 	   Blast_QueryIsTranslated(program_number) ||
1450 	   Blast_SubjectIsTranslated(program_number)) {
1451        (*options)->do_sum_stats = TRUE;
1452    } else {
1453        (*options)->do_sum_stats = FALSE;
1454    }
1455 
1456    (*options)->hsp_filt_opt = NULL;
1457 
1458    (*options)->max_edit_distance = INT4_MAX;
1459 
1460    return 0;
1461 
1462 }
1463 
1464 Int2
BLAST_FillHitSavingOptions(BlastHitSavingOptions * options,double evalue,Int4 hitlist_size,Boolean is_gapped,Int4 culling_limit,Int4 min_diag_separation)1465 BLAST_FillHitSavingOptions(BlastHitSavingOptions* options,
1466                            double evalue, Int4 hitlist_size,
1467                            Boolean is_gapped, Int4 culling_limit,
1468                            Int4 min_diag_separation)
1469 {
1470    if (!options)
1471       return BLASTERR_INVALIDPARAM;
1472 
1473    if (hitlist_size)
1474       options->hitlist_size = hitlist_size;
1475    if (evalue)
1476       options->expect_value = evalue;
1477    if (min_diag_separation)
1478       options->min_diag_separation = min_diag_separation;
1479    options->culling_limit = culling_limit;
1480    options->hsp_filt_opt = NULL;
1481    options->max_edit_distance = INT4_MAX;
1482 
1483    return 0;
1484 
1485 }
1486 
1487 Int2
BlastHitSavingOptionsValidate(EBlastProgramType program_number,const BlastHitSavingOptions * options,Blast_Message ** blast_msg)1488 BlastHitSavingOptionsValidate(EBlastProgramType program_number,
1489    const BlastHitSavingOptions* options, Blast_Message* *blast_msg)
1490 {
1491 	if (options == NULL)
1492 		return BLASTERR_INVALIDPARAM;
1493 
1494 	if (options->hitlist_size < 1)
1495 	{
1496 		Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1497                          "No hits are being saved");
1498 		return BLASTERR_OPTION_VALUE_INVALID;
1499 	}
1500 
1501 	if (options->expect_value <= 0.0 && options->cutoff_score <= 0)
1502 	{
1503 		Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1504          "expect value or cutoff score must be greater than zero");
1505 		return BLASTERR_OPTION_VALUE_INVALID;
1506 	}
1507 
1508    if (options->longest_intron != 0 &&
1509        program_number != eBlastTypeTblastn &&
1510        program_number != eBlastTypePsiTblastn &&
1511        program_number != eBlastTypeBlastx &&
1512        program_number != eBlastTypeMapping) {
1513                 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1514          "Uneven gap linking of HSPs is allowed for blastx, "
1515          "tblastn, and psitblastn only");
1516                 return BLASTERR_OPTION_PROGRAM_INVALID;
1517    }
1518 
1519 	if (options->culling_limit < 0)
1520 	{
1521 		Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1522                     "culling limit must be greater than or equal to zero");
1523 		return BLASTERR_OPTION_VALUE_INVALID;
1524 	}
1525 
1526     if (options->hsp_filt_opt) {
1527         if (BlastHSPFilteringOptionsValidate(options->hsp_filt_opt) != 0) {
1528             Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1529                         "HSP Filtering options invalid");
1530             return BLASTERR_OPTION_VALUE_INVALID;
1531         }
1532     }
1533 
1534 	return 0;
1535 }
1536 
PSIBlastOptionsNew(PSIBlastOptions ** psi_options)1537 Int2 PSIBlastOptionsNew(PSIBlastOptions** psi_options)
1538 {
1539    PSIBlastOptions* options = NULL;
1540 
1541    if ( !psi_options )
1542       return BLASTERR_INVALIDPARAM;
1543 
1544    options = (PSIBlastOptions*)calloc(1, sizeof(PSIBlastOptions));
1545    if ( !options )
1546        return BLASTERR_MEMORY;
1547 
1548    *psi_options = options;
1549    options->inclusion_ethresh = PSI_INCLUSION_ETHRESH;
1550    options->pseudo_count = PSI_PSEUDO_COUNT_CONST;
1551    options->use_best_alignment = TRUE;
1552 
1553    options->nsg_compatibility_mode = FALSE;
1554    options->impala_scaling_factor = kPSSM_NoImpalaScaling;
1555    options->ignore_unaligned_positions = FALSE;
1556 
1557    return 0;
1558 }
1559 
PSIBlastOptionsValidate(const PSIBlastOptions * psi_options,Blast_Message ** blast_msg)1560 Int2 PSIBlastOptionsValidate(const PSIBlastOptions* psi_options,
1561                              Blast_Message** blast_msg)
1562 {
1563     Int2 retval = 1;    /* assume failure */
1564 
1565     if ( !psi_options ) {
1566         return retval;
1567     }
1568 
1569     if (psi_options->pseudo_count < 0) {
1570         Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1571                            "Pseudo count must be greater than or equal to 0");
1572         return retval;
1573     }
1574 
1575     if (psi_options->inclusion_ethresh <= 0.0) {
1576         Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1577                            "Inclusion threshold must be greater than 0");
1578         return retval;
1579     }
1580 
1581     retval = 0;
1582     return retval;
1583 }
1584 
PSIBlastOptionsFree(PSIBlastOptions * psi_options)1585 PSIBlastOptions* PSIBlastOptionsFree(PSIBlastOptions* psi_options)
1586 {
1587    sfree(psi_options);
1588    return NULL;
1589 }
1590 
BlastDatabaseOptionsNew(BlastDatabaseOptions ** db_options)1591 Int2 BlastDatabaseOptionsNew(BlastDatabaseOptions** db_options)
1592 {
1593    BlastDatabaseOptions* options = NULL;
1594 
1595    if ( !db_options ) {
1596        return BLASTERR_INVALIDPARAM;
1597    }
1598 
1599    options = (BlastDatabaseOptions*) calloc(1, sizeof(BlastDatabaseOptions));
1600    if ( !options ) {
1601        return  BLASTERR_MEMORY;
1602    }
1603 
1604    options->genetic_code = BLAST_GENETIC_CODE;
1605    *db_options = options;
1606 
1607    return 0;
1608 }
1609 
1610 BlastDatabaseOptions*
BlastDatabaseOptionsFree(BlastDatabaseOptions * db_options)1611 BlastDatabaseOptionsFree(BlastDatabaseOptions* db_options)
1612 {
1613 
1614    if (db_options == NULL)
1615       return NULL;
1616 
1617    sfree(db_options);
1618    return NULL;
1619 }
1620 
BLAST_InitDefaultOptions(EBlastProgramType program_number,LookupTableOptions ** lookup_options,QuerySetUpOptions ** query_setup_options,BlastInitialWordOptions ** word_options,BlastExtensionOptions ** ext_options,BlastHitSavingOptions ** hit_options,BlastScoringOptions ** score_options,BlastEffectiveLengthsOptions ** eff_len_options,PSIBlastOptions ** psi_options,BlastDatabaseOptions ** db_options)1621 Int2 BLAST_InitDefaultOptions(EBlastProgramType program_number,
1622    LookupTableOptions** lookup_options,
1623    QuerySetUpOptions** query_setup_options,
1624    BlastInitialWordOptions** word_options,
1625    BlastExtensionOptions** ext_options,
1626    BlastHitSavingOptions** hit_options,
1627    BlastScoringOptions** score_options,
1628    BlastEffectiveLengthsOptions** eff_len_options,
1629    PSIBlastOptions** psi_options,
1630    BlastDatabaseOptions** db_options)
1631 {
1632    Int2 status;
1633 
1634    if ((status = LookupTableOptionsNew(program_number, lookup_options)))
1635       return status;
1636 
1637    if ((status=BlastQuerySetUpOptionsNew(query_setup_options)))
1638       return status;
1639 
1640    if ((status=BlastInitialWordOptionsNew(program_number, word_options)))
1641       return status;
1642 
1643    if ((status=BlastScoringOptionsNew(program_number, score_options)))
1644       return status;
1645 
1646    if ((status = BlastExtensionOptionsNew(program_number, ext_options,
1647                                        (*score_options)->gapped_calculation)))
1648       return status;
1649 
1650    if ((status=BlastHitSavingOptionsNew(program_number, hit_options,
1651                                         (*score_options)->gapped_calculation)))
1652       return status;
1653 
1654    if ((status=BlastEffectiveLengthsOptionsNew(eff_len_options)))
1655       return status;
1656 
1657    if ((status=PSIBlastOptionsNew(psi_options)))
1658       return status;
1659 
1660    if ((status=BlastDatabaseOptionsNew(db_options)))
1661       return status;
1662 
1663    return 0;
1664 
1665 }
1666 
1667 /**  Checks that the extension and scoring options are consistent with each other
1668  * @param program_number identifies the program [in]
1669  * @param ext_options the extension options [in]
1670  * @param score_options the scoring options [in]
1671  * @param blast_msg returns a message on errors. [in|out]
1672  * @return zero on success, an error code otherwise.
1673  */
s_BlastExtensionScoringOptionsValidate(EBlastProgramType program_number,const BlastExtensionOptions * ext_options,const BlastScoringOptions * score_options,Blast_Message ** blast_msg)1674 static Int2 s_BlastExtensionScoringOptionsValidate(EBlastProgramType program_number,
1675                            const BlastExtensionOptions* ext_options,
1676                            const BlastScoringOptions* score_options,
1677                            Blast_Message* *blast_msg)
1678 {
1679     if (ext_options == NULL || score_options == NULL)
1680         return BLASTERR_INVALIDPARAM;
1681 
1682     if (program_number == eBlastTypeBlastn)
1683     {
1684         if (score_options->gap_open == 0 && score_options->gap_extend == 0)
1685         {
1686             if (ext_options->ePrelimGapExt != eGreedyScoreOnly &&
1687                 ext_options->eTbackExt != eGreedyTbck)
1688                 {
1689                     Blast_MessageWrite(blast_msg, eBlastSevWarning,
1690                                        kBlastMessageNoContext,
1691                                        "Greedy extension must be used if gap existence and extension options are zero");
1692                     return BLASTERR_OPTION_VALUE_INVALID;
1693                 }
1694         }
1695     }
1696 
1697     if (program_number == eBlastTypeMapping) {
1698         if (ext_options->ePrelimGapExt != eJumperWithTraceback) {
1699 
1700             Blast_MessageWrite(blast_msg, eBlastSevWarning,
1701                                kBlastMessageNoContext,
1702                                "Jumper extension must be used for mapping");
1703 
1704             return BLASTERR_OPTION_VALUE_INVALID;
1705         }
1706     }
1707 
1708     if (ext_options->compositionBasedStats != eNoCompositionBasedStats)
1709     {
1710         if (!Blast_QueryIsPssm(program_number) && program_number != eBlastTypeTblastn &&
1711             program_number != eBlastTypeBlastp &&
1712             program_number != eBlastTypeBlastx &&
1713             program_number != eBlastTypeRpsBlast &&
1714             program_number != eBlastTypeRpsTblastn &&
1715             program_number != eBlastTypePsiBlast) {
1716 			Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
1717                             "Compositional adjustments are only supported with blastp, blastx, or tblastn");
1718 			return BLASTERR_OPTION_VALUE_INVALID;
1719         }
1720         if (!score_options->gapped_calculation) {
1721 			Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
1722                             "Compositional adjustments are only supported for gapped searches");
1723 			return BLASTERR_OPTION_VALUE_INVALID;
1724         }
1725 
1726     }
1727 
1728     return 0;
1729 }
1730 
1731 
BLAST_ValidateOptions(EBlastProgramType program_number,const BlastExtensionOptions * ext_options,const BlastScoringOptions * score_options,const LookupTableOptions * lookup_options,const BlastInitialWordOptions * word_options,const BlastHitSavingOptions * hit_options,Blast_Message ** blast_msg)1732 Int2 BLAST_ValidateOptions(EBlastProgramType program_number,
1733                            const BlastExtensionOptions* ext_options,
1734                            const BlastScoringOptions* score_options,
1735                            const LookupTableOptions* lookup_options,
1736                            const BlastInitialWordOptions* word_options,
1737                            const BlastHitSavingOptions* hit_options,
1738                            Blast_Message* *blast_msg)
1739 {
1740    Int2 status = 0;
1741 
1742    if ((status = BlastExtensionOptionsValidate(program_number, ext_options,
1743                                                blast_msg)) != 0)
1744        return status;
1745    if ((status = BlastScoringOptionsValidate(program_number, score_options,
1746                                                blast_msg)) != 0)
1747        return status;
1748    if ((status = LookupTableOptionsValidate(program_number,
1749                     lookup_options, blast_msg)) != 0)
1750        return status;
1751    if ((status = BlastInitialWordOptionsValidate(program_number,
1752                     word_options, blast_msg)) != 0)
1753        return status;
1754    if ((status = BlastHitSavingOptionsValidate(program_number, hit_options,
1755                                                blast_msg)) != 0)
1756        return status;
1757    if ((status = s_BlastExtensionScoringOptionsValidate(program_number, ext_options,
1758                                                score_options, blast_msg)) != 0)
1759        return status;
1760 
1761    /* Word sizes larger than 5 are not suported for IDENTITY scoring matrix.
1762     Identity matrix is only supported for blastp and tblastn. */
1763     if (program_number == eBlastTypeBlastp ||
1764         program_number == eBlastTypeTblastn) {
1765 
1766        char* matrix = BLAST_StrToUpper(score_options->matrix);
1767        Boolean is_identity = strcmp(matrix, "IDENTITY") == 0;
1768 
1769        if (matrix) {
1770            free(matrix);
1771        }
1772 
1773        if (lookup_options->word_size > 5 && is_identity) {
1774 
1775            Blast_MessageWrite(blast_msg, eBlastSevError,
1776                               kBlastMessageNoContext,
1777                               "Word size larger than 5 is not supported for "
1778                               "the identity scoring matrix");
1779 
1780            return BLASTERR_OPTION_VALUE_INVALID;
1781        }
1782    }
1783 
1784    return status;
1785 }
1786 
BlastHSPBestHitOptionsNew(double overhang,double score_edge)1787 BlastHSPBestHitOptions* BlastHSPBestHitOptionsNew(double overhang, double score_edge)
1788 {
1789     BlastHSPBestHitOptions* retval =
1790         (BlastHSPBestHitOptions*) calloc(1, sizeof(BlastHSPBestHitOptions));
1791     retval->overhang = overhang;
1792     retval->score_edge = score_edge;
1793     return retval;
1794 }
1795 
1796 Int2
BlastHSPBestHitOptionsValidate(const BlastHSPFilteringOptions * opts)1797 BlastHSPBestHitOptionsValidate(const BlastHSPFilteringOptions* opts)
1798 {
1799     Int2 retval = 0;    /* assume success */
1800     BlastHSPBestHitOptions* best_hit = opts->best_hit;
1801 
1802     if ( !best_hit ) {
1803         return retval;
1804     }
1805 
1806     if (best_hit->overhang <= kBestHit_OverhangMin ||
1807         best_hit->overhang >= kBestHit_OverhangMax) {
1808         return -1;
1809     }
1810 
1811     if (best_hit->score_edge <= kBestHit_ScoreEdgeMin ||
1812         best_hit->score_edge >= kBestHit_ScoreEdgeMax) {
1813         return -1;
1814     }
1815 
1816     return retval;
1817 }
1818 
BlastHSPBestHitOptionsFree(BlastHSPBestHitOptions * opt)1819 BlastHSPBestHitOptions* BlastHSPBestHitOptionsFree(BlastHSPBestHitOptions* opt)
1820 {
1821     if ( !opt ) {
1822         return NULL;
1823     }
1824     sfree(opt);
1825     return NULL;
1826 }
1827 
BlastHSPCullingOptionsNew(int max)1828 BlastHSPCullingOptions* BlastHSPCullingOptionsNew(int max)
1829 {
1830     BlastHSPCullingOptions* retval =
1831         (BlastHSPCullingOptions*) calloc(1, sizeof(BlastHSPCullingOptions));
1832     retval->max_hits = max;
1833     return retval;
1834 }
1835 
1836 Int2
BlastHSPCullingOptionsValidate(const BlastHSPFilteringOptions * opts)1837 BlastHSPCullingOptionsValidate(const BlastHSPFilteringOptions* opts)
1838 {
1839     Int2 retval = 0;
1840     BlastHSPCullingOptions* culling_opts = opts->culling_opts;
1841     if (!culling_opts)
1842        return retval;
1843 
1844     if (culling_opts->max_hits < 0)
1845        return -1;
1846 
1847     return retval;
1848 }
1849 
1850 BlastHSPCullingOptions*
BlastHSPCullingOptionsFree(BlastHSPCullingOptions * culling_opts)1851 BlastHSPCullingOptionsFree(BlastHSPCullingOptions* culling_opts)
1852 {
1853    if (!culling_opts)
1854     return NULL;
1855 
1856    sfree(culling_opts);
1857    return NULL;
1858 }
1859 
1860 
BlastHSPFilteringOptionsNew()1861 BlastHSPFilteringOptions* BlastHSPFilteringOptionsNew()
1862 {
1863     return (BlastHSPFilteringOptions*)calloc(1,
1864                                              sizeof(BlastHSPFilteringOptions));
1865 }
1866 
1867 Int2
BlastHSPFilteringOptions_AddBestHit(BlastHSPFilteringOptions * filt_opts,BlastHSPBestHitOptions ** best_hit,EBlastStage stage)1868 BlastHSPFilteringOptions_AddBestHit(BlastHSPFilteringOptions* filt_opts,
1869                                     BlastHSPBestHitOptions** best_hit,
1870                                     EBlastStage stage)
1871 {
1872     if ( filt_opts == NULL || best_hit == NULL || *best_hit == NULL) {
1873         return 1;
1874     }
1875 
1876     filt_opts->best_hit = *best_hit;
1877     *best_hit = NULL;
1878     filt_opts->best_hit_stage = stage;
1879 
1880     return 0;
1881 }
1882 
1883 Int2
BlastHSPFilteringOptions_AddCulling(BlastHSPFilteringOptions * filt_opts,BlastHSPCullingOptions ** culling,EBlastStage stage)1884 BlastHSPFilteringOptions_AddCulling(BlastHSPFilteringOptions* filt_opts,
1885                                     BlastHSPCullingOptions** culling,
1886                                     EBlastStage stage)
1887 {
1888     if ( filt_opts == NULL || culling == NULL || *culling == NULL) {
1889         return 1;
1890     }
1891 
1892     filt_opts->culling_opts = *culling;
1893     *culling = NULL;
1894     filt_opts->culling_stage = stage;
1895 
1896     return 0;
1897 }
1898 
1899 Int2
BlastHSPFilteringOptionsValidate(const BlastHSPFilteringOptions * opts)1900 BlastHSPFilteringOptionsValidate(const BlastHSPFilteringOptions* opts)
1901 {
1902     Int2 retval = 0;    /* assume success */
1903     Boolean writer_found = FALSE;
1904 
1905     if ( (retval = BlastHSPBestHitOptionsValidate(opts)) != 0) {
1906         return retval;
1907     }
1908     if (opts->best_hit_stage & ePrelimSearch) {
1909         writer_found = TRUE;
1910     }
1911 
1912     if ( (retval = BlastHSPCullingOptionsValidate(opts)) != 0) {
1913         return retval;
1914     }
1915     if ((opts->culling_stage & ePrelimSearch) && writer_found) {
1916         return 1;
1917     }
1918 
1919     return retval;
1920 }
1921 
1922 BlastHSPFilteringOptions*
BlastHSPFilteringOptionsFree(BlastHSPFilteringOptions * opts)1923 BlastHSPFilteringOptionsFree(BlastHSPFilteringOptions* opts)
1924 {
1925     if ( !opts ) {
1926         return NULL;
1927     }
1928     opts->best_hit = BlastHSPBestHitOptionsFree(opts->best_hit);
1929     opts->culling_opts = BlastHSPCullingOptionsFree(opts->culling_opts);
1930     opts->subject_besthit_opts = BlastHSPSubjectBestHitOptionsFree(opts->subject_besthit_opts);
1931     sfree(opts);
1932     return opts;
1933 }
1934 
1935 BlastHSPSubjectBestHitOptions*
BlastHSPSubjectBestHitOptionsNew(Boolean isProtein)1936 BlastHSPSubjectBestHitOptionsNew(Boolean isProtein)
1937 {
1938     BlastHSPSubjectBestHitOptions* retval =
1939         (BlastHSPSubjectBestHitOptions*) calloc(1, sizeof(BlastHSPSubjectBestHitOptions));
1940     if(isProtein){
1941         retval->max_range_diff = DEFAULT_SUBJECT_BESTHIT_PROT_MAX_RANGE_DIFF;
1942     }
1943     else {
1944         retval->max_range_diff = DEFAULT_SUBJECT_BESTHIT_NUCL_MAX_RANGE_DIFF;
1945     }
1946     return retval;
1947 }
1948 
1949 Int2
BlastHSPSubjectBestHitOptionsValidate(const BlastHSPFilteringOptions * opts)1950 BlastHSPSubjectBestHitOptionsValidate(const BlastHSPFilteringOptions* opts)
1951 {
1952     Int2 retval = 0;
1953     BlastHSPSubjectBestHitOptions* besthit_opts = opts->subject_besthit_opts;
1954     if (!besthit_opts)
1955        return retval;
1956 
1957     return retval;
1958 }
1959 
1960 BlastHSPSubjectBestHitOptions*
BlastHSPSubjectBestHitOptionsFree(BlastHSPSubjectBestHitOptions * subject_besthit_opts)1961 BlastHSPSubjectBestHitOptionsFree(BlastHSPSubjectBestHitOptions* subject_besthit_opts)
1962 {
1963    if (!subject_besthit_opts)
1964     return NULL;
1965 
1966    sfree(subject_besthit_opts);
1967    return NULL;
1968 }
1969 
1970 Int2
BlastHSPFilteringOptions_AddSubjectBestHit(BlastHSPFilteringOptions * filt_opts,BlastHSPSubjectBestHitOptions ** subject_besthit)1971 BlastHSPFilteringOptions_AddSubjectBestHit(BlastHSPFilteringOptions* filt_opts,
1972                                            BlastHSPSubjectBestHitOptions** subject_besthit)
1973 {
1974     if ( filt_opts == NULL || subject_besthit == NULL || *subject_besthit == NULL) {
1975         return 1;
1976     }
1977 
1978     filt_opts->subject_besthit_opts = *subject_besthit;
1979     *subject_besthit = NULL;
1980 
1981     return 0;
1982 }
1983 
1984 
1985 
1986