1 /*  $Id: bl2seq.cpp 526381 2017-02-02 14:59:08Z madden $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Christiam Camacho
27  *
28  * ===========================================================================
29  */
30 
31 /// @file bl2seq.cpp
32 /// Implementation of CBl2Seq class.
33 
34 #include <ncbi_pch.hpp>
35 #include <algo/blast/api/bl2seq.hpp>
36 #include "blast_objmgr_priv.hpp"
37 #include <algo/blast/api/objmgr_query_data.hpp>
38 
39 /** @addtogroup AlgoBlast
40  *
41  * @{
42  */
43 
44 BEGIN_NCBI_SCOPE
45 USING_SCOPE(objects);
BEGIN_SCOPE(blast)46 BEGIN_SCOPE(blast)
47 
48 CBl2Seq::CBl2Seq(const SSeqLoc& query, const SSeqLoc& subject, EProgram p)
49     : m_DbScanMode(false), m_InterruptFnx(0), m_InterruptUserData(0)
50 {
51     TSeqLocVector queries;
52     TSeqLocVector subjects;
53     queries.push_back(query);
54     subjects.push_back(subject);
55 
56     x_Init(queries, subjects);
57     m_OptsHandle.Reset(CBlastOptionsFactory::Create(p));
58 }
59 
x_InitCLocalBlast()60 void CBl2Seq::x_InitCLocalBlast()
61 {
62     _ASSERT( !m_tQueries.empty() );
63     _ASSERT( !m_tSubjects.empty() );
64     _ASSERT( !m_OptsHandle.Empty() );
65     CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(m_tQueries));
66     CRef<IQueryFactory> subject_factory(new CObjMgr_QueryFactory(m_tSubjects));
67     CRef<CLocalDbAdapter> db(new CLocalDbAdapter(subject_factory, m_OptsHandle, m_DbScanMode));
68     m_Blast.Reset(new CLocalBlast(query_factory, m_OptsHandle, db));
69     if (m_InterruptFnx != NULL) {
70         m_Blast->SetInterruptCallback(m_InterruptFnx, m_InterruptUserData);
71     }
72     // Set the hitlist size to the total number of subject sequences, to
73     // make sure that no hits are discarded (ported from CBl2Seq::SetupSearch
74     m_OptsHandle->SetHitlistSize((int) m_tSubjects.size());
75 }
76 
CBl2Seq(const SSeqLoc & query,const SSeqLoc & subject,CBlastOptionsHandle & opts)77 CBl2Seq::CBl2Seq(const SSeqLoc& query, const SSeqLoc& subject,
78                  CBlastOptionsHandle& opts)
79     : m_DbScanMode(false), m_InterruptFnx(0), m_InterruptUserData(0)
80 {
81     TSeqLocVector queries;
82     TSeqLocVector subjects;
83     queries.push_back(query);
84     subjects.push_back(subject);
85 
86     x_Init(queries, subjects);
87     m_OptsHandle.Reset(&opts);
88 }
89 
CBl2Seq(const SSeqLoc & query,const TSeqLocVector & subjects,EProgram p,bool dbscan_mode)90 CBl2Seq::CBl2Seq(const SSeqLoc& query, const TSeqLocVector& subjects,
91                  EProgram p, bool dbscan_mode)
92     : m_DbScanMode(dbscan_mode), m_InterruptFnx(0), m_InterruptUserData(0)
93 {
94     TSeqLocVector queries;
95     queries.push_back(query);
96 
97     x_Init(queries, subjects);
98     m_OptsHandle.Reset(CBlastOptionsFactory::Create(p));
99 }
100 
CBl2Seq(const SSeqLoc & query,const TSeqLocVector & subjects,CBlastOptionsHandle & opts,bool dbscan_mode)101 CBl2Seq::CBl2Seq(const SSeqLoc& query, const TSeqLocVector& subjects,
102                  CBlastOptionsHandle& opts, bool dbscan_mode)
103     : m_DbScanMode(dbscan_mode), m_InterruptFnx(0), m_InterruptUserData(0)
104 {
105     TSeqLocVector queries;
106     queries.push_back(query);
107 
108     x_Init(queries, subjects);
109     m_OptsHandle.Reset(&opts);
110 }
111 
CBl2Seq(const TSeqLocVector & queries,const TSeqLocVector & subjects,EProgram p,bool dbscan_mode)112 CBl2Seq::CBl2Seq(const TSeqLocVector& queries, const TSeqLocVector& subjects,
113                  EProgram p, bool dbscan_mode)
114     : m_DbScanMode(dbscan_mode), m_InterruptFnx(0), m_InterruptUserData(0)
115 {
116     x_Init(queries, subjects);
117     m_OptsHandle.Reset(CBlastOptionsFactory::Create(p));
118 }
119 
CBl2Seq(const TSeqLocVector & queries,const TSeqLocVector & subjects,CBlastOptionsHandle & opts,bool dbscan_mode)120 CBl2Seq::CBl2Seq(const TSeqLocVector& queries, const TSeqLocVector& subjects,
121                  CBlastOptionsHandle& opts, bool dbscan_mode)
122     : m_DbScanMode(dbscan_mode), m_InterruptFnx(0), m_InterruptUserData(0)
123 {
124     x_Init(queries, subjects);
125     m_OptsHandle.Reset(&opts);
126 }
127 
x_Init(const TSeqLocVector & queries,const TSeqLocVector & subjs)128 void CBl2Seq::x_Init(const TSeqLocVector& queries, const TSeqLocVector& subjs)
129 {
130     m_tQueries = queries;
131     m_tSubjects = subjs;
132     mi_pDiagnostics = NULL;
133 }
134 
~CBl2Seq()135 CBl2Seq::~CBl2Seq()
136 {
137     x_ResetInternalDs();
138 }
139 
140 void
x_ResetInternalDs()141 CBl2Seq::x_ResetInternalDs()
142 {
143     // should be changed if derived classes are created
144     m_Messages.clear();
145     mi_pDiagnostics = Blast_DiagnosticsFree(mi_pDiagnostics);
146     m_AncillaryData.clear();
147     m_Results.Reset();
148 }
149 
150 extern CRef<CSeq_align_set> CreateEmptySeq_align_set();
151 
152 TSeqAlignVector
CSearchResultSet2TSeqAlignVector(CRef<CSearchResultSet> res)153 CBl2Seq::CSearchResultSet2TSeqAlignVector(CRef<CSearchResultSet> res)
154 {
155     if (res.Empty()) {
156         return TSeqAlignVector();
157     }
158     TSeqAlignVector retval;
159     retval.reserve(res->GetNumResults());
160     ITERATE(CSearchResultSet, r, *res) {
161         CRef<CSeq_align_set> sa;
162         if ((*r)->HasAlignments()) {
163             sa.Reset(const_cast<CSeq_align_set*>(&*(*r)->GetSeqAlign()));
164         } else {
165             sa.Reset(CreateEmptySeq_align_set());
166         }
167         retval.push_back(sa);
168     }
169     return retval;
170 }
171 
172 TSeqAlignVector
Run()173 CBl2Seq::Run()
174 {
175     if (m_Results.NotEmpty()) {
176         // return cached results from previous run
177         return CBl2Seq::CSearchResultSet2TSeqAlignVector(m_Results);
178     }
179 
180     (void) RunEx();
181     x_BuildAncillaryData();
182     return CBl2Seq::CSearchResultSet2TSeqAlignVector(m_Results);
183 }
184 
185 void
x_BuildAncillaryData()186 CBl2Seq::x_BuildAncillaryData()
187 {
188     m_AncillaryData.clear();
189     m_AncillaryData.reserve(m_Results->size());
190     ITERATE(CSearchResultSet, r, *m_Results) {
191         m_AncillaryData.push_back((*r)->GetAncillaryData());
192     }
193 }
194 
195 CRef<CSearchResultSet>
RunEx()196 CBl2Seq::RunEx()
197 {
198     x_InitCLocalBlast();
199     if (m_Results.NotEmpty()) {
200         // return cached results from previous run
201         return m_Results;
202     }
203 
204     //m_OptsHandle->GetOptions().DebugDumpText(cerr, "m_OptsHandle", 1);
205     _ASSERT(m_Blast.NotEmpty());
206     m_Results = m_Blast->Run();
207     m_Messages = m_Blast->GetSearchMessages();
208     if (m_Blast->m_InternalData.NotEmpty()) {
209         mi_pDiagnostics =
210             Blast_DiagnosticsCopy(m_Blast->m_InternalData->m_Diagnostics->GetPointer());
211     }
212     return m_Results;
213 }
214 
215 TSeqLocInfoVector
GetFilteredQueryRegions() const216 CBl2Seq::GetFilteredQueryRegions() const
217 {
218     return m_Results->GetFilteredQueryRegions();
219 }
220 
221 void
GetFilteredSubjectRegions(vector<TSeqLocInfoVector> & retval) const222 CBl2Seq::GetFilteredSubjectRegions(vector<TSeqLocInfoVector>& retval) const
223 {
224     retval.clear();
225     if (m_Results.Empty() || m_Results->empty()) {
226         return;
227     }
228     ITERATE(CSearchResultSet, res, *m_Results) {
229         TSeqLocInfoVector subj_masks;
230         (*res)->GetSubjectMasks(subj_masks);
231         retval.push_back(subj_masks);
232     }
233 }
234 
235 END_SCOPE(blast)
236 END_NCBI_SCOPE
237 
238 /* @} */
239