1 #ifndef ALGO_BLAST_API___SEARCH_STRATEGY__HPP
2 #define ALGO_BLAST_API___SEARCH_STRATEGY__HPP
3 
4 /*  $Id: search_strategy.hpp 591152 2019-08-12 11:18:21Z fongah2 $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors:  Tom Madden
30  *
31  */
32 
33 /// @file search_strategy.hpp
34 /// Declares the CImportStrategy and CExportStrategy
35 
36 #include <algo/blast/api/remote_blast.hpp>
37 #include <algo/blast/api/blast_options_handle.hpp>
38 #include <algo/blast/api/blast_options_builder.hpp>
39 #include <objects/blast/blast__.hpp>
40 
41 /** @addtogroup AlgoBlast
42  *
43  * @{
44  */
45 
46 BEGIN_NCBI_SCOPE
47 
48 BEGIN_SCOPE(objects)
49     /// forward declaration of ASN.1 object containing PSSM (scoremat.asn)
50     class CPssmWithParameters;
51     class CBioseq_set;
52     class CSeq_loc;
53     class CSeq_id;
54     class CSeq_align_set;
55 END_SCOPE(objects)
56 
57 BEGIN_SCOPE(blast)
58 
59 /// This is the "mutable" data for CImportStrategy.
60 struct CImportStrategyData {
61 
62     /// Has the struct been properly filled in?
63     bool valid;
64 
65     /// BLAST options.
66     CRef<blast::CBlastOptionsHandle> m_OptionsHandle;
67 
68     /// Filtering ID
69     int m_FilteringID;
70 
71     /// Range of query.
72     TSeqRange m_QueryRange;
73 
74     /// Task, such as megablast, blastn, blastp, etc.
75     string m_Task;
76 
77     unsigned int m_PsiNumOfIterations;
78 
79     /// Filtering key
80     string m_FilteringKey;
81 
82     /// Subject Masking Type
83     ESubjectMaskingType m_SubjectMaskingType;
84 
85     /// Constructor
CImportStrategyDataCImportStrategyData86     CImportStrategyData() {
87         valid = false;
88         m_OptionsHandle.Reset(0);
89         m_FilteringID = -1; // means uninitialized/unknown
90         m_QueryRange = TSeqRange::GetEmpty();
91         m_PsiNumOfIterations = 0;
92         m_FilteringKey = kEmptyStr; // means uninitialized/unknown
93         m_SubjectMaskingType = eNoSubjMasking; // means uninitialized/unknown
94     }
95 };
96 
97 
98 /// Class to return parts of the CBlast4_request, or data associated with
99 /// a CBlast4_request, such as options.
100 class NCBI_XBLAST_EXPORT CImportStrategy : public CObject
101 {
102 public:
103     /// Constructor, imports the CBlast4_request
104     CImportStrategy(CRef<objects::CBlast4_request> request,
105                     bool ignore_unsupported_options = false);
106 
107     /// Builds and returns the OptionsHandle
108     CRef<blast::CBlastOptionsHandle> GetOptionsHandle() ;
109 
110     /// Fetches task, such as "megablast", "blastn", etc.
111     string GetTask() ;
112 
113     /// Fetches service, such as psiblast, plain, megablast
114     string GetService() const;
115 
116     /// Fetches program, one of blastn, blastp, blastx, tblastn, tblastx
117     string GetProgram() const;
118 
119     /// Returns ident field from a Blast4-request
120     string GetCreatedBy() const;
121 
122     /// The start and stop on the query (if applicable)
123     TSeqRange GetQueryRange();
124 
125     /// The DB filter ID.
126     int GetDBFilteringID() ;
127 
128     /// The DB filter key.
129     string GetDBFilteringKey() ;
130 
131     /// Get Subject Masking Type
132     ESubjectMaskingType GetSubjectMaskingType();
133 
134     /// The queries either as Bioseq, seqloc, or pssm.
135     CRef<objects::CBlast4_queries> GetQueries();
136 
137     /// Returns the target sequences.  This is then a choice of a
138     /// database (for searches over a blast database) or as a
139     /// list of Bioseqs (for bl2seq type searches).
140     CRef<objects::CBlast4_subject> GetSubject();
141 
142     /// Options specific to blast searches (e.g, threshold, expect value).
143     /// @return the algorithm options or NULL if unavailable
144     objects::CBlast4_parameters* GetAlgoOptions();
145 
146     /// Options for controlling program execution and database filtering.
147     /// @return the program options or NULL if unavailable
148     objects::CBlast4_parameters* GetProgramOptions();
149 
150     /// Options for controlling formatting (psi blast iteration number also).
151     /// @return the web formatting options or NULL if unavailable
152     objects::CBlast4_parameters* GetWebFormatOptions();
153 
154     /// Get number of iteration for psi blast, return 0 if num of iterations not available
155     unsigned int GetPsiNumOfIterations();
156 
157     /// Return the BlastOptions builder used in this class
GetOptionsBuilder()158     CBlastOptionsBuilder& GetOptionsBuilder() {
159 
160         if (m_OptionsBuilder.get() == NULL) {
161             FetchData();
162         }
163         _ASSERT(m_OptionsBuilder.get() != NULL);
164         return *m_OptionsBuilder.get();
165     }
166 
167     // Get Tax IDs for filterting, set size 0 if no tax id found
168     set<int> GetTaxidList();
169 
170     // Get Negative Tax IDs for filterting, set size 0 if no negative tax id found
171     set<int> GetNegativeTaxidList();
172 
173 private:
174     /// Fills in CImportStrategyData and m_OptionsBuilder
175     void FetchData();
176 
177    void  x_GetProgramOptionIntegerList(EBlastOptIdx idx, list<int> & list);
178     auto_ptr<CImportStrategyData> m_Data;
179     CRef<objects::CBlast4_request> m_Request;
180     string m_Service;
181     auto_ptr<CBlastOptionsBuilder> m_OptionsBuilder;
182     /// ignore unsupported options when creating blast options builder object
183     bool m_IgnoreUnsupportedOptions;
184 
185     /// Prohibit copy constructor
186     CImportStrategy(const CImportStrategy& rhs);
187     /// Prohibit assignment operator
188     CImportStrategy& operator=(const CImportStrategy& rhs);
189 };
190 
191 
192 class NCBI_XBLAST_EXPORT CExportStrategy : public CObject
193 {
194 public:
195 	/// Construct search strategy with :-.
196     /// @param opts_handle Blast options handle
197 	///        (Note: only eRemote or eBoth mode are supported)
198     CExportStrategy(CRef<CBlastOptionsHandle>  	opts_handle,
199     			    const string & 				client_id = kEmptyStr);
200 
201     /// Construct search strategy with :-.
202     /// @param queries Queries corresponding to Seq-loc-list or Bioseq-set.
203     /// @param opts_handle Blast options handle.
204 	///        (Note: only eRemote or eBoth mode are supported)
205     /// @param db Database used for this search.
206     CExportStrategy(CRef<IQueryFactory>         query,
207                  	CRef<CBlastOptionsHandle>  	opts_handle,
208                  	CRef<CSearchDatabase> 		db,
209     			    const string & 				client_id = kEmptyStr,
210     			    unsigned int				psi_num_iterations = 0);
211 
212     /// Construct search strategy with :-.
213     /// @param queries Queries corresponding to Seq-loc-list or Bioseq-set.
214     /// @param opts_handle Blast options handle.
215 	///        (Note: only eRemote or eBoth mode are supported)
216     /// @param subjects Subject corresponding to Seq-loc-list or Bioseq-set.
217     CExportStrategy(CRef<IQueryFactory>       	query,
218                  	CRef<CBlastOptionsHandle> 	opts_handle,
219                  	CRef<IQueryFactory>       	subject,
220     			    const string & 				client_id = kEmptyStr);
221 
222     /// Construct search strategy with :-.
223     /// @param pssm Search matrix for a PSSM search.
224     /// @param opts_handle Blast options handle.
225 	///        (Note: only eRemote or eBoth mode are supported)
226     /// @param db Database used for this search.
227     CExportStrategy(CRef<CPssmWithParameters>	pssm,
228                  	CRef<CBlastOptionsHandle>   opts_handle,
229                  	CRef<CSearchDatabase> 		db,
230     			    const string & 				client_id = kEmptyStr,
231     			    unsigned int				psi_num_iterations = 0);
232 
233     // Return Search Strategy constructed by calling one of the constructors above
234     CRef<objects::CBlast4_request> GetSearchStrategy(void);
235 
236     // Export Search Strategy (Blast4-request) in ASN1 format
237     void ExportSearchStrategy_ASN1(CNcbiOstream* out);
238 
239 private:
240 	// Prohibit copy and assign constructors
241 	CExportStrategy(const CExportStrategy & );
242 	CExportStrategy & operator=(const CExportStrategy & );
243 
244 	void x_Process_BlastOptions(CRef<CBlastOptionsHandle> & opts_handle);
245 	void x_Process_Query(CRef<IQueryFactory> & query);
246 	void x_Process_Pssm(CRef<CPssmWithParameters> & pssm);
247 	void x_Process_SearchDb(CRef<CSearchDatabase> & db);
248 	void x_Process_Subject(CRef<IQueryFactory> & subject);
249 
250 	void x_AddParameterToProgramOptions(objects::CBlast4Field & field,
251 	                   	 	    		const int int_value);
252 	void x_AddParameterToProgramOptions(objects::CBlast4Field & field,
253 										const vector<int> & int_list);
254 	void x_AddParameterToProgramOptions(objects::CBlast4Field & field,
255 										const vector<Int8> & int_list);
256 	void x_AddParameterToProgramOptions(objects::CBlast4Field & field,
257                                         const string & str);
258 
259 	void x_AddPsiNumOfIterationsToFormatOptions(unsigned int num_iters);
260 
261 	CRef<CBlast4_queue_search_request>   	m_QueueSearchRequest;
262 	string									m_ClientId;
263 };
264 
265 END_SCOPE(blast)
266 END_NCBI_SCOPE
267 
268 /* @} */
269 
270 #endif  /* ALGO_BLAST_API___SEARCH_STRATEGY__HPP */
271