1 /*  $Id: bl2seq.hpp 478658 2015-09-11 14:30:02Z madden $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  Christiam Camacho
27 *
28 */
29 
30 /// @file bl2seq.hpp
31 /// Declares the CBl2Seq (BLAST 2 Sequences) class
32 
33 #ifndef ALGO_BLAST_API___BL2SEQ__HPP
34 #define ALGO_BLAST_API___BL2SEQ__HPP
35 
36 #include <algo/blast/api/blast_types.hpp>
37 #include <algo/blast/api/sseqloc.hpp>
38 #include <algo/blast/api/blast_aux.hpp>
39 #include <algo/blast/api/blast_options_handle.hpp>
40 #include <algo/blast/api/blast_results.hpp>
41 #include <algo/blast/api/local_blast.hpp>
42 
43 /** @addtogroup AlgoBlast
44  *
45  * @{
46  */
47 
48 class CBlastFilterTest;
49 
50 BEGIN_NCBI_SCOPE
51 BEGIN_SCOPE(blast)
52 
53 /// Runs the BLAST algorithm between 2 sequences.
54 /// @note this is a single-BLAST search run object (i.e.: it caches the results
55 /// after a BLAST search is done). If multiple BLAST searches with different
56 /// queries, subjects, or options are required, please create a separate object
57 class NCBI_XBLAST_EXPORT CBl2Seq : public CObject
58 {
59 public:
60 
61     /// Constructor to compare 2 sequences with default options
62     CBl2Seq(const SSeqLoc& query, const SSeqLoc& subject, EProgram p);
63 
64     /// Constructor to compare query against all subject sequences with
65     /// default options
66     /// @param dbscan_mode Database search mode (as opposed to pairwise)
67     CBl2Seq(const SSeqLoc& query, const TSeqLocVector& subjects, EProgram p,
68 	bool dbscan_mode=false);
69 
70     /// Constructor to allow query concatenation with default options
71     /// @param dbscan_mode Database search mode (as opposed to pairwise)
72     CBl2Seq(const TSeqLocVector& queries, const TSeqLocVector& subjects,
73             EProgram p, bool dbscan_mode=false);
74 
75     /// Constructor to compare 2 sequences with specified options
76     CBl2Seq(const SSeqLoc& query, const SSeqLoc& subject,
77             CBlastOptionsHandle& opts);
78 
79     /// Constructor to compare query against all subject sequences with
80     /// specified options
81     /// @param dbscan_mode Database search mode (as opposed to pairwise)
82     CBl2Seq(const SSeqLoc& query, const TSeqLocVector& subjects,
83             CBlastOptionsHandle& opts, bool dbscan_mode=false);
84 
85     /// Constructor to allow query concatenation with specified options
86     /// @param dbscan_mode Database search mode (as opposed to pairwise)
87     CBl2Seq(const TSeqLocVector& queries, const TSeqLocVector& subjects,
88             CBlastOptionsHandle& opts, bool dbscan_mode=false);
89 
90     /// Destructor
91     virtual ~CBl2Seq();
92 
93     /// Set the query.
94     void SetQuery(const SSeqLoc& query);
95 
96     /// Retrieve the query sequence.
97     const SSeqLoc& GetQuery() const;
98 
99     /// Set a vector of query sequences for a concatenated search.
100     void SetQueries(const TSeqLocVector& queries);
101 
102     /// Retrieve a vector of query sequences.
103     const TSeqLocVector& GetQueries() const;
104 
105     /// Set the subject sequence.
106     void SetSubject(const SSeqLoc& subject);
107 
108     /// Retrieve the subject sequence.
109     const SSeqLoc& GetSubject() const;
110 
111     /// Set a vector of subject sequences.
112     void SetSubjects(const TSeqLocVector& subjects);
113 
114     /// Retrieve a vector of subject sequences.
115     const TSeqLocVector& GetSubjects() const;
116 
117     /// Set the options handle.
118     CBlastOptionsHandle& SetOptionsHandle();
119 
120     /// Retrieve the options handle.
121     const CBlastOptionsHandle& GetOptionsHandle() const;
122 
123     /// Perform BLAST search
124     /// Assuming N queries and M subjects, the structure of the returned
125     /// vector is as follows, with types indicated in parenthesis:
126     /// TSeqAlignVector =
127     ///     [ {Results for query 1 and subject 1 (Seq-align-set)},
128     ///       {Results for query 1 and subject 2 (Seq-align-set)}, ...
129     ///       {Results for query 1 and subject M (Seq-align-set)},
130     ///       {Results for query 2 and subject 1 (Seq-align-set)},
131     ///       {Results for query 2 and subject 2 (Seq-align-set)}, ...
132     ///       {Results for query 2 and subject M (Seq-align-set)},
133     ///       {Results for query 3 and subject 1 (Seq-align-set)}, ...
134     ///       {Results for query N and subject M (Seq-align-set)} ]
135     virtual TSeqAlignVector Run();
136 
137     /// Performs the same functionality as Run(), but it returns a different
138     /// data type
139     /// @note the number of CSearchResultSet::value_type objects in this
140     /// function's return value will be (number of queries * number of
141     /// subjects)
142     CRef<CSearchResultSet> RunEx();
143 
144     /// Retrieves regions filtered on the query/queries
145     TSeqLocInfoVector GetFilteredQueryRegions() const;
146 
147     /// Retrieves regions filtered on the subject sequence(s)
148     /// @param retval the return value of this method [in|out]
149     void GetFilteredSubjectRegions(vector<TSeqLocInfoVector>& retval) const;
150 
151     /// Retrieves the diagnostics information returned from the engine
152     BlastDiagnostics* GetDiagnostics() const;
153 
154     /// Get the ancillary results for a BLAST search (to be used with the Run()
155     /// method)
156     /// @param retval the return value of this method [in|out]
157     void GetAncillaryResults(CSearchResultSet::TAncillaryVector& retval) const;
158 
159     /// Returns error messages/warnings.
160     void GetMessages(TSearchMessages& messages) const;
161 
162     /// Set a function callback to be invoked by the CORE of BLAST to allow
163     /// interrupting a BLAST search in progress.
164     /// @param fnptr pointer to callback function [in]
165     /// @param user_data user data to be attached to SBlastProgress structure
166     /// [in]
167     /// @return the previously set TInterruptFnPtr (NULL if none was
168     /// provided before)
169     TInterruptFnPtr SetInterruptCallback(TInterruptFnPtr fnptr,
170                                          void* user_data = NULL);
171 
172     /// Converts m_Results data member to a TSeqAlignVector
173     static TSeqAlignVector
174         CSearchResultSet2TSeqAlignVector(CRef<CSearchResultSet> res);
175 protected:
176     /// Populate the internal m_AncillaryData member
177     void x_BuildAncillaryData();
178 
179 private:
180     // Data members received from client code
181     TSeqLocVector        m_tQueries;         ///< query sequence(s)
182     TSeqLocVector        m_tSubjects;        ///< sequence(s) to BLAST against
183     CRef<CBlastOptionsHandle>  m_OptsHandle; ///< Blast options
184     CRef<CLocalBlast>    m_Blast;            ///< The actual BLAST instance
185     bool		m_DbScanMode;        ///< Scan like a databsase (as opposed to pairwise)
186 
187     /// Common initialization code for all c-tors
188     void x_Init(const TSeqLocVector& queries, const TSeqLocVector& subjs);
189     /// Common initialization of the CLocalBlast object
190     void x_InitCLocalBlast();
191 
192     /// Prohibit copy constructor
193     CBl2Seq(const CBl2Seq& rhs);
194     /// Prohibit assignment operator
195     CBl2Seq& operator=(const CBl2Seq& rhs);
196 
197     /// Stores any warnings emitted during query setup
198     TSearchMessages                     m_Messages;
199 
200     /************ Internal data structures (m_i = internal members)***********/
201     /// Return search statistics data
202     BlastDiagnostics*                   mi_pDiagnostics;
203 
204     /// Ancillary BLAST data
205     CSearchResultSet::TAncillaryVector  m_AncillaryData;
206 
207     /// CLocalBlast results
208     CRef<CSearchResultSet> m_Results;
209 
210     /// Interrupt callback
211     TInterruptFnPtr m_InterruptFnx;
212     /// Interrupt user datacallback
213     void* m_InterruptUserData;
214 
215     /// Clean up structures and results from any previous search
216     void x_ResetInternalDs();
217 
218     friend class ::CBlastFilterTest;
219 };
220 
221 inline void
SetQuery(const SSeqLoc & query)222 CBl2Seq::SetQuery(const SSeqLoc& query)
223 {
224     x_ResetInternalDs();
225     m_tQueries.clear();
226     m_tQueries.push_back(query);
227 }
228 
229 inline const SSeqLoc&
GetQuery() const230 CBl2Seq::GetQuery() const
231 {
232     return m_tQueries.front();
233 }
234 
235 inline void
SetQueries(const TSeqLocVector & queries)236 CBl2Seq::SetQueries(const TSeqLocVector& queries)
237 {
238     x_ResetInternalDs();
239     m_tQueries.clear();
240     m_tQueries = queries;
241 }
242 
243 inline const TSeqLocVector&
GetQueries() const244 CBl2Seq::GetQueries() const
245 {
246     return m_tQueries;
247 }
248 
249 inline void
SetSubject(const SSeqLoc & subject)250 CBl2Seq::SetSubject(const SSeqLoc& subject)
251 {
252     x_ResetInternalDs();
253     m_tSubjects.clear();
254     m_tSubjects.push_back(subject);
255 }
256 
257 inline const SSeqLoc&
GetSubject() const258 CBl2Seq::GetSubject() const
259 {
260     return m_tSubjects.front();
261 }
262 
263 inline void
SetSubjects(const TSeqLocVector & subjects)264 CBl2Seq::SetSubjects(const TSeqLocVector& subjects)
265 {
266     x_ResetInternalDs();
267     m_tSubjects.clear();
268     m_tSubjects = subjects;
269 }
270 
271 inline const TSeqLocVector&
GetSubjects() const272 CBl2Seq::GetSubjects() const
273 {
274     return m_tSubjects;
275 }
276 
277 inline CBlastOptionsHandle&
SetOptionsHandle()278 CBl2Seq::SetOptionsHandle()
279 {
280     x_ResetInternalDs();
281     return *m_OptsHandle;
282 }
283 
284 inline const CBlastOptionsHandle&
GetOptionsHandle() const285 CBl2Seq::GetOptionsHandle() const
286 {
287     return *m_OptsHandle;
288 }
289 
GetDiagnostics() const290 inline BlastDiagnostics* CBl2Seq::GetDiagnostics() const
291 {
292     return mi_pDiagnostics;
293 }
294 
295 inline void
GetMessages(TSearchMessages & messages) const296 CBl2Seq::GetMessages(TSearchMessages& messages) const
297 {
298     messages = m_Messages;
299 }
300 
301 inline TInterruptFnPtr
SetInterruptCallback(TInterruptFnPtr fnptr,void * user_data)302 CBl2Seq::SetInterruptCallback(TInterruptFnPtr fnptr, void* user_data)
303 {
304     TInterruptFnPtr tmp = m_InterruptFnx;
305     m_InterruptFnx = fnptr;
306     m_InterruptUserData = user_data;
307     return tmp;
308 }
309 
310 inline void
GetAncillaryResults(CSearchResultSet::TAncillaryVector & retval) const311 CBl2Seq::GetAncillaryResults(CSearchResultSet::TAncillaryVector& retval) const
312 {
313     retval = m_AncillaryData;
314 }
315 
316 END_SCOPE(blast)
317 END_NCBI_SCOPE
318 
319 /* @} */
320 
321 #endif  /* ALGO_BLAST_API___BL2SEQ__HPP */
322