1 /*  $Id: magicblast_options.cpp 577533 2019-01-04 22:49:43Z boratyng $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Greg Boratyn
27  *
28  */
29 
30 /// @file blast_mapper_options.cpp
31 /// Implements the CMagicBlastOptionsHandle class.
32 
33 #include <ncbi_pch.hpp>
34 //#include <algo/blast/core/blast_encoding.h>
35 #include <algo/blast/api/magicblast_options.hpp>
36 //#include <objects/seqloc/Na_strand.hpp>
37 //#include "blast_setup.hpp"
38 
39 /** @addtogroup AlgoBlast
40  *
41  * @{
42  */
43 
44 
45 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(blast)46 BEGIN_SCOPE(blast)
47 
48 CMagicBlastOptionsHandle::CMagicBlastOptionsHandle(EAPILocality locality)
49     : CBlastOptionsHandle(locality)
50 {
51     SetDefaults();
52 }
53 
54 
CMagicBlastOptionsHandle(CRef<CBlastOptions> opt)55 CMagicBlastOptionsHandle::CMagicBlastOptionsHandle(CRef<CBlastOptions> opt)
56     : CBlastOptionsHandle(opt)
57 {
58 }
59 
60 
61 void
SetDefaults()62 CMagicBlastOptionsHandle::SetDefaults()
63 {
64     m_Opts->SetDefaultsMode(true);
65     SetRNAToGenomeDefaults();
66     m_Opts->SetDefaultsMode(false);
67 }
68 
69 void
SetRNAToGenomeDefaults()70 CMagicBlastOptionsHandle::SetRNAToGenomeDefaults()
71 {
72     m_Opts->SetDefaultsMode(true);
73     m_Opts->SetProgram(eMapper);
74     SetLookupTableDefaults();
75     SetQueryOptionDefaults();
76     SetInitialWordOptionsDefaults();
77     SetGappedExtensionDefaults();
78     SetScoringOptionsDefaults();
79     SetHitSavingOptionsDefaults();
80     SetEffectiveLengthsOptionsDefaults();
81     SetSubjectSequenceOptionsDefaults();
82     m_Opts->SetDefaultsMode(false);
83 }
84 
85 void
SetRNAToRNADefaults()86 CMagicBlastOptionsHandle::SetRNAToRNADefaults()
87 {
88     m_Opts->SetDefaultsMode(true);
89     m_Opts->SetProgram(eMapper);
90     SetLookupTableDefaults();
91     SetQueryOptionDefaults();
92     SetInitialWordOptionsDefaults();
93     SetGappedExtensionDefaults();
94     SetScoringOptionsDefaults();
95     SetHitSavingOptionsDefaults();
96     SetEffectiveLengthsOptionsDefaults();
97     SetSubjectSequenceOptionsDefaults();
98 
99     SetMismatchPenalty(-4);
100     SetGapExtensionCost(4);
101     SetLookupDbFilter(false);
102     SetSpliceAlignments(false);
103     SetWordSize(30);
104 
105     m_Opts->SetDefaultsMode(false);
106 }
107 
108 
109 void
SetGenomeToGenomeDefaults()110 CMagicBlastOptionsHandle::SetGenomeToGenomeDefaults()
111 {
112     m_Opts->SetDefaultsMode(true);
113     m_Opts->SetProgram(eMapper);
114     SetLookupTableDefaults();
115     SetQueryOptionDefaults();
116     SetInitialWordOptionsDefaults();
117     SetGappedExtensionDefaults();
118     SetScoringOptionsDefaults();
119     SetHitSavingOptionsDefaults();
120     SetEffectiveLengthsOptionsDefaults();
121     SetSubjectSequenceOptionsDefaults();
122 
123     SetMismatchPenalty(-4);
124     SetGapExtensionCost(4);
125     SetLookupDbFilter(true);
126     SetSpliceAlignments(false);
127     SetWordSize(28);
128 
129     m_Opts->SetDefaultsMode(false);
130 }
131 
132 void
SetLookupTableDefaults()133 CMagicBlastOptionsHandle::SetLookupTableDefaults()
134 {
135     if (getenv("MAPPER_MB_LOOKUP")) {
136         m_Opts->SetLookupTableType(eMBLookupTable);
137     }
138     else {
139         m_Opts->SetLookupTableType(eNaHashLookupTable);
140     }
141     SetWordSize(BLAST_WORDSIZE_MAPPER);
142     m_Opts->SetWordThreshold(BLAST_WORD_THRESHOLD_BLASTN);
143     SetMaxDbWordCount(MAX_DB_WORD_COUNT_MAPPER);
144     SetLookupTableStride(0);
145 }
146 
147 
148 void
SetQueryOptionDefaults()149 CMagicBlastOptionsHandle::SetQueryOptionDefaults()
150 {
151     SetReadQualityFiltering(true);
152     m_Opts->SetDustFiltering(false);
153     // Masking is used for filtering out poor quality reads which are masked
154     // for the full length. Setting mask at hash to true prevents additional
155     // memory allocation for unmasked queries.
156     m_Opts->SetMaskAtHash(true);
157     m_Opts->SetStrandOption(objects::eNa_strand_both);
158     SetLookupDbFilter(true);
159     SetPaired(false);
160 }
161 
162 void
SetInitialWordOptionsDefaults()163 CMagicBlastOptionsHandle::SetInitialWordOptionsDefaults()
164 {
165 }
166 
167 void
SetGappedExtensionDefaults()168 CMagicBlastOptionsHandle::SetGappedExtensionDefaults()
169 {
170     m_Opts->SetGapExtnAlgorithm(eJumperWithTraceback);
171     m_Opts->SetMaxMismatches(5);
172     m_Opts->SetMismatchWindow(10);
173     SetSpliceAlignments(true);
174     // 0 means that the value will be set to max(-penalty, gap open +
175     // gap extend)
176     SetGapXDropoff(0);
177 }
178 
179 
180 void
SetScoringOptionsDefaults()181 CMagicBlastOptionsHandle::SetScoringOptionsDefaults()
182 {
183     m_Opts->SetMatrixName(NULL);
184     SetGapOpeningCost(BLAST_GAP_OPEN_MAPPER);
185     SetGapExtensionCost(BLAST_GAP_EXTN_MAPPER);
186     m_Opts->SetMatchReward(BLAST_REWARD_MAPPER);
187     SetMismatchPenalty(BLAST_PENALTY_MAPPER);
188     m_Opts->SetGappedMode();
189     m_Opts->SetComplexityAdjMode(false);
190 
191     // set out-of-frame options to invalid? values
192     m_Opts->SetOutOfFrameMode(false);
193     m_Opts->SetFrameShiftPenalty(INT2_MAX);
194 }
195 
196 void
SetHitSavingOptionsDefaults()197 CMagicBlastOptionsHandle::SetHitSavingOptionsDefaults()
198 {
199     m_Opts->SetHitlistSize(500);
200     m_Opts->SetEvalueThreshold(BLAST_EXPECT_VALUE);
201     m_Opts->SetPercentIdentity(0);
202     // set some default here, allow INT4MAX to mean infinity
203     m_Opts->SetMaxNumHspPerSequence(0);
204     m_Opts->SetMaxHspsPerSubject(0);
205     // cutoff zero means use adaptive score threshold that depends on query
206     // length
207     SetCutoffScore(0);
208     vector<double> coeffs = {0.0, 0.0};
209     SetCutoffScoreCoeffs(coeffs);
210     SetMaxEditDistance(INT4_MAX);
211     SetLongestIntronLength(500000);
212 
213     // do not compute each query's ungapped alignment score threshold to
214     // trigger gapped alignment
215     m_Opts->SetLowScorePerc(0.0);
216     m_Opts->SetQueryCovHspPerc(0);
217 }
218 
219 void
SetEffectiveLengthsOptionsDefaults()220 CMagicBlastOptionsHandle::SetEffectiveLengthsOptionsDefaults()
221 {
222     m_Opts->SetDbLength(0);
223     m_Opts->SetDbSeqNum(0);
224     m_Opts->SetEffectiveSearchSpace(0);
225 }
226 
227 void
SetSubjectSequenceOptionsDefaults()228 CMagicBlastOptionsHandle::SetSubjectSequenceOptionsDefaults()
229 {}
230 
231 END_SCOPE(blast)
232 END_NCBI_SCOPE
233 
234 
235 /* @} */
236