1 /* $Id: bl2seq.hpp 478658 2015-09-11 14:30:02Z madden $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Christiam Camacho
27 *
28 */
29
30 /// @file bl2seq.hpp
31 /// Declares the CBl2Seq (BLAST 2 Sequences) class
32
33 #ifndef ALGO_BLAST_API___BL2SEQ__HPP
34 #define ALGO_BLAST_API___BL2SEQ__HPP
35
36 #include <algo/blast/api/blast_types.hpp>
37 #include <algo/blast/api/sseqloc.hpp>
38 #include <algo/blast/api/blast_aux.hpp>
39 #include <algo/blast/api/blast_options_handle.hpp>
40 #include <algo/blast/api/blast_results.hpp>
41 #include <algo/blast/api/local_blast.hpp>
42
43 /** @addtogroup AlgoBlast
44 *
45 * @{
46 */
47
48 class CBlastFilterTest;
49
50 BEGIN_NCBI_SCOPE
51 BEGIN_SCOPE(blast)
52
53 /// Runs the BLAST algorithm between 2 sequences.
54 /// @note this is a single-BLAST search run object (i.e.: it caches the results
55 /// after a BLAST search is done). If multiple BLAST searches with different
56 /// queries, subjects, or options are required, please create a separate object
57 class NCBI_XBLAST_EXPORT CBl2Seq : public CObject
58 {
59 public:
60
61 /// Constructor to compare 2 sequences with default options
62 CBl2Seq(const SSeqLoc& query, const SSeqLoc& subject, EProgram p);
63
64 /// Constructor to compare query against all subject sequences with
65 /// default options
66 /// @param dbscan_mode Database search mode (as opposed to pairwise)
67 CBl2Seq(const SSeqLoc& query, const TSeqLocVector& subjects, EProgram p,
68 bool dbscan_mode=false);
69
70 /// Constructor to allow query concatenation with default options
71 /// @param dbscan_mode Database search mode (as opposed to pairwise)
72 CBl2Seq(const TSeqLocVector& queries, const TSeqLocVector& subjects,
73 EProgram p, bool dbscan_mode=false);
74
75 /// Constructor to compare 2 sequences with specified options
76 CBl2Seq(const SSeqLoc& query, const SSeqLoc& subject,
77 CBlastOptionsHandle& opts);
78
79 /// Constructor to compare query against all subject sequences with
80 /// specified options
81 /// @param dbscan_mode Database search mode (as opposed to pairwise)
82 CBl2Seq(const SSeqLoc& query, const TSeqLocVector& subjects,
83 CBlastOptionsHandle& opts, bool dbscan_mode=false);
84
85 /// Constructor to allow query concatenation with specified options
86 /// @param dbscan_mode Database search mode (as opposed to pairwise)
87 CBl2Seq(const TSeqLocVector& queries, const TSeqLocVector& subjects,
88 CBlastOptionsHandle& opts, bool dbscan_mode=false);
89
90 /// Destructor
91 virtual ~CBl2Seq();
92
93 /// Set the query.
94 void SetQuery(const SSeqLoc& query);
95
96 /// Retrieve the query sequence.
97 const SSeqLoc& GetQuery() const;
98
99 /// Set a vector of query sequences for a concatenated search.
100 void SetQueries(const TSeqLocVector& queries);
101
102 /// Retrieve a vector of query sequences.
103 const TSeqLocVector& GetQueries() const;
104
105 /// Set the subject sequence.
106 void SetSubject(const SSeqLoc& subject);
107
108 /// Retrieve the subject sequence.
109 const SSeqLoc& GetSubject() const;
110
111 /// Set a vector of subject sequences.
112 void SetSubjects(const TSeqLocVector& subjects);
113
114 /// Retrieve a vector of subject sequences.
115 const TSeqLocVector& GetSubjects() const;
116
117 /// Set the options handle.
118 CBlastOptionsHandle& SetOptionsHandle();
119
120 /// Retrieve the options handle.
121 const CBlastOptionsHandle& GetOptionsHandle() const;
122
123 /// Perform BLAST search
124 /// Assuming N queries and M subjects, the structure of the returned
125 /// vector is as follows, with types indicated in parenthesis:
126 /// TSeqAlignVector =
127 /// [ {Results for query 1 and subject 1 (Seq-align-set)},
128 /// {Results for query 1 and subject 2 (Seq-align-set)}, ...
129 /// {Results for query 1 and subject M (Seq-align-set)},
130 /// {Results for query 2 and subject 1 (Seq-align-set)},
131 /// {Results for query 2 and subject 2 (Seq-align-set)}, ...
132 /// {Results for query 2 and subject M (Seq-align-set)},
133 /// {Results for query 3 and subject 1 (Seq-align-set)}, ...
134 /// {Results for query N and subject M (Seq-align-set)} ]
135 virtual TSeqAlignVector Run();
136
137 /// Performs the same functionality as Run(), but it returns a different
138 /// data type
139 /// @note the number of CSearchResultSet::value_type objects in this
140 /// function's return value will be (number of queries * number of
141 /// subjects)
142 CRef<CSearchResultSet> RunEx();
143
144 /// Retrieves regions filtered on the query/queries
145 TSeqLocInfoVector GetFilteredQueryRegions() const;
146
147 /// Retrieves regions filtered on the subject sequence(s)
148 /// @param retval the return value of this method [in|out]
149 void GetFilteredSubjectRegions(vector<TSeqLocInfoVector>& retval) const;
150
151 /// Retrieves the diagnostics information returned from the engine
152 BlastDiagnostics* GetDiagnostics() const;
153
154 /// Get the ancillary results for a BLAST search (to be used with the Run()
155 /// method)
156 /// @param retval the return value of this method [in|out]
157 void GetAncillaryResults(CSearchResultSet::TAncillaryVector& retval) const;
158
159 /// Returns error messages/warnings.
160 void GetMessages(TSearchMessages& messages) const;
161
162 /// Set a function callback to be invoked by the CORE of BLAST to allow
163 /// interrupting a BLAST search in progress.
164 /// @param fnptr pointer to callback function [in]
165 /// @param user_data user data to be attached to SBlastProgress structure
166 /// [in]
167 /// @return the previously set TInterruptFnPtr (NULL if none was
168 /// provided before)
169 TInterruptFnPtr SetInterruptCallback(TInterruptFnPtr fnptr,
170 void* user_data = NULL);
171
172 /// Converts m_Results data member to a TSeqAlignVector
173 static TSeqAlignVector
174 CSearchResultSet2TSeqAlignVector(CRef<CSearchResultSet> res);
175 protected:
176 /// Populate the internal m_AncillaryData member
177 void x_BuildAncillaryData();
178
179 private:
180 // Data members received from client code
181 TSeqLocVector m_tQueries; ///< query sequence(s)
182 TSeqLocVector m_tSubjects; ///< sequence(s) to BLAST against
183 CRef<CBlastOptionsHandle> m_OptsHandle; ///< Blast options
184 CRef<CLocalBlast> m_Blast; ///< The actual BLAST instance
185 bool m_DbScanMode; ///< Scan like a databsase (as opposed to pairwise)
186
187 /// Common initialization code for all c-tors
188 void x_Init(const TSeqLocVector& queries, const TSeqLocVector& subjs);
189 /// Common initialization of the CLocalBlast object
190 void x_InitCLocalBlast();
191
192 /// Prohibit copy constructor
193 CBl2Seq(const CBl2Seq& rhs);
194 /// Prohibit assignment operator
195 CBl2Seq& operator=(const CBl2Seq& rhs);
196
197 /// Stores any warnings emitted during query setup
198 TSearchMessages m_Messages;
199
200 /************ Internal data structures (m_i = internal members)***********/
201 /// Return search statistics data
202 BlastDiagnostics* mi_pDiagnostics;
203
204 /// Ancillary BLAST data
205 CSearchResultSet::TAncillaryVector m_AncillaryData;
206
207 /// CLocalBlast results
208 CRef<CSearchResultSet> m_Results;
209
210 /// Interrupt callback
211 TInterruptFnPtr m_InterruptFnx;
212 /// Interrupt user datacallback
213 void* m_InterruptUserData;
214
215 /// Clean up structures and results from any previous search
216 void x_ResetInternalDs();
217
218 friend class ::CBlastFilterTest;
219 };
220
221 inline void
SetQuery(const SSeqLoc & query)222 CBl2Seq::SetQuery(const SSeqLoc& query)
223 {
224 x_ResetInternalDs();
225 m_tQueries.clear();
226 m_tQueries.push_back(query);
227 }
228
229 inline const SSeqLoc&
GetQuery() const230 CBl2Seq::GetQuery() const
231 {
232 return m_tQueries.front();
233 }
234
235 inline void
SetQueries(const TSeqLocVector & queries)236 CBl2Seq::SetQueries(const TSeqLocVector& queries)
237 {
238 x_ResetInternalDs();
239 m_tQueries.clear();
240 m_tQueries = queries;
241 }
242
243 inline const TSeqLocVector&
GetQueries() const244 CBl2Seq::GetQueries() const
245 {
246 return m_tQueries;
247 }
248
249 inline void
SetSubject(const SSeqLoc & subject)250 CBl2Seq::SetSubject(const SSeqLoc& subject)
251 {
252 x_ResetInternalDs();
253 m_tSubjects.clear();
254 m_tSubjects.push_back(subject);
255 }
256
257 inline const SSeqLoc&
GetSubject() const258 CBl2Seq::GetSubject() const
259 {
260 return m_tSubjects.front();
261 }
262
263 inline void
SetSubjects(const TSeqLocVector & subjects)264 CBl2Seq::SetSubjects(const TSeqLocVector& subjects)
265 {
266 x_ResetInternalDs();
267 m_tSubjects.clear();
268 m_tSubjects = subjects;
269 }
270
271 inline const TSeqLocVector&
GetSubjects() const272 CBl2Seq::GetSubjects() const
273 {
274 return m_tSubjects;
275 }
276
277 inline CBlastOptionsHandle&
SetOptionsHandle()278 CBl2Seq::SetOptionsHandle()
279 {
280 x_ResetInternalDs();
281 return *m_OptsHandle;
282 }
283
284 inline const CBlastOptionsHandle&
GetOptionsHandle() const285 CBl2Seq::GetOptionsHandle() const
286 {
287 return *m_OptsHandle;
288 }
289
GetDiagnostics() const290 inline BlastDiagnostics* CBl2Seq::GetDiagnostics() const
291 {
292 return mi_pDiagnostics;
293 }
294
295 inline void
GetMessages(TSearchMessages & messages) const296 CBl2Seq::GetMessages(TSearchMessages& messages) const
297 {
298 messages = m_Messages;
299 }
300
301 inline TInterruptFnPtr
SetInterruptCallback(TInterruptFnPtr fnptr,void * user_data)302 CBl2Seq::SetInterruptCallback(TInterruptFnPtr fnptr, void* user_data)
303 {
304 TInterruptFnPtr tmp = m_InterruptFnx;
305 m_InterruptFnx = fnptr;
306 m_InterruptUserData = user_data;
307 return tmp;
308 }
309
310 inline void
GetAncillaryResults(CSearchResultSet::TAncillaryVector & retval) const311 CBl2Seq::GetAncillaryResults(CSearchResultSet::TAncillaryVector& retval) const
312 {
313 retval = m_AncillaryData;
314 }
315
316 END_SCOPE(blast)
317 END_NCBI_SCOPE
318
319 /* @} */
320
321 #endif /* ALGO_BLAST_API___BL2SEQ__HPP */
322