1 /* $Id: bl2seq.cpp 526381 2017-02-02 14:59:08Z madden $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Christiam Camacho
27 *
28 * ===========================================================================
29 */
30
31 /// @file bl2seq.cpp
32 /// Implementation of CBl2Seq class.
33
34 #include <ncbi_pch.hpp>
35 #include <algo/blast/api/bl2seq.hpp>
36 #include "blast_objmgr_priv.hpp"
37 #include <algo/blast/api/objmgr_query_data.hpp>
38
39 /** @addtogroup AlgoBlast
40 *
41 * @{
42 */
43
44 BEGIN_NCBI_SCOPE
45 USING_SCOPE(objects);
BEGIN_SCOPE(blast)46 BEGIN_SCOPE(blast)
47
48 CBl2Seq::CBl2Seq(const SSeqLoc& query, const SSeqLoc& subject, EProgram p)
49 : m_DbScanMode(false), m_InterruptFnx(0), m_InterruptUserData(0)
50 {
51 TSeqLocVector queries;
52 TSeqLocVector subjects;
53 queries.push_back(query);
54 subjects.push_back(subject);
55
56 x_Init(queries, subjects);
57 m_OptsHandle.Reset(CBlastOptionsFactory::Create(p));
58 }
59
x_InitCLocalBlast()60 void CBl2Seq::x_InitCLocalBlast()
61 {
62 _ASSERT( !m_tQueries.empty() );
63 _ASSERT( !m_tSubjects.empty() );
64 _ASSERT( !m_OptsHandle.Empty() );
65 CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(m_tQueries));
66 CRef<IQueryFactory> subject_factory(new CObjMgr_QueryFactory(m_tSubjects));
67 CRef<CLocalDbAdapter> db(new CLocalDbAdapter(subject_factory, m_OptsHandle, m_DbScanMode));
68 m_Blast.Reset(new CLocalBlast(query_factory, m_OptsHandle, db));
69 if (m_InterruptFnx != NULL) {
70 m_Blast->SetInterruptCallback(m_InterruptFnx, m_InterruptUserData);
71 }
72 // Set the hitlist size to the total number of subject sequences, to
73 // make sure that no hits are discarded (ported from CBl2Seq::SetupSearch
74 m_OptsHandle->SetHitlistSize((int) m_tSubjects.size());
75 }
76
CBl2Seq(const SSeqLoc & query,const SSeqLoc & subject,CBlastOptionsHandle & opts)77 CBl2Seq::CBl2Seq(const SSeqLoc& query, const SSeqLoc& subject,
78 CBlastOptionsHandle& opts)
79 : m_DbScanMode(false), m_InterruptFnx(0), m_InterruptUserData(0)
80 {
81 TSeqLocVector queries;
82 TSeqLocVector subjects;
83 queries.push_back(query);
84 subjects.push_back(subject);
85
86 x_Init(queries, subjects);
87 m_OptsHandle.Reset(&opts);
88 }
89
CBl2Seq(const SSeqLoc & query,const TSeqLocVector & subjects,EProgram p,bool dbscan_mode)90 CBl2Seq::CBl2Seq(const SSeqLoc& query, const TSeqLocVector& subjects,
91 EProgram p, bool dbscan_mode)
92 : m_DbScanMode(dbscan_mode), m_InterruptFnx(0), m_InterruptUserData(0)
93 {
94 TSeqLocVector queries;
95 queries.push_back(query);
96
97 x_Init(queries, subjects);
98 m_OptsHandle.Reset(CBlastOptionsFactory::Create(p));
99 }
100
CBl2Seq(const SSeqLoc & query,const TSeqLocVector & subjects,CBlastOptionsHandle & opts,bool dbscan_mode)101 CBl2Seq::CBl2Seq(const SSeqLoc& query, const TSeqLocVector& subjects,
102 CBlastOptionsHandle& opts, bool dbscan_mode)
103 : m_DbScanMode(dbscan_mode), m_InterruptFnx(0), m_InterruptUserData(0)
104 {
105 TSeqLocVector queries;
106 queries.push_back(query);
107
108 x_Init(queries, subjects);
109 m_OptsHandle.Reset(&opts);
110 }
111
CBl2Seq(const TSeqLocVector & queries,const TSeqLocVector & subjects,EProgram p,bool dbscan_mode)112 CBl2Seq::CBl2Seq(const TSeqLocVector& queries, const TSeqLocVector& subjects,
113 EProgram p, bool dbscan_mode)
114 : m_DbScanMode(dbscan_mode), m_InterruptFnx(0), m_InterruptUserData(0)
115 {
116 x_Init(queries, subjects);
117 m_OptsHandle.Reset(CBlastOptionsFactory::Create(p));
118 }
119
CBl2Seq(const TSeqLocVector & queries,const TSeqLocVector & subjects,CBlastOptionsHandle & opts,bool dbscan_mode)120 CBl2Seq::CBl2Seq(const TSeqLocVector& queries, const TSeqLocVector& subjects,
121 CBlastOptionsHandle& opts, bool dbscan_mode)
122 : m_DbScanMode(dbscan_mode), m_InterruptFnx(0), m_InterruptUserData(0)
123 {
124 x_Init(queries, subjects);
125 m_OptsHandle.Reset(&opts);
126 }
127
x_Init(const TSeqLocVector & queries,const TSeqLocVector & subjs)128 void CBl2Seq::x_Init(const TSeqLocVector& queries, const TSeqLocVector& subjs)
129 {
130 m_tQueries = queries;
131 m_tSubjects = subjs;
132 mi_pDiagnostics = NULL;
133 }
134
~CBl2Seq()135 CBl2Seq::~CBl2Seq()
136 {
137 x_ResetInternalDs();
138 }
139
140 void
x_ResetInternalDs()141 CBl2Seq::x_ResetInternalDs()
142 {
143 // should be changed if derived classes are created
144 m_Messages.clear();
145 mi_pDiagnostics = Blast_DiagnosticsFree(mi_pDiagnostics);
146 m_AncillaryData.clear();
147 m_Results.Reset();
148 }
149
150 extern CRef<CSeq_align_set> CreateEmptySeq_align_set();
151
152 TSeqAlignVector
CSearchResultSet2TSeqAlignVector(CRef<CSearchResultSet> res)153 CBl2Seq::CSearchResultSet2TSeqAlignVector(CRef<CSearchResultSet> res)
154 {
155 if (res.Empty()) {
156 return TSeqAlignVector();
157 }
158 TSeqAlignVector retval;
159 retval.reserve(res->GetNumResults());
160 ITERATE(CSearchResultSet, r, *res) {
161 CRef<CSeq_align_set> sa;
162 if ((*r)->HasAlignments()) {
163 sa.Reset(const_cast<CSeq_align_set*>(&*(*r)->GetSeqAlign()));
164 } else {
165 sa.Reset(CreateEmptySeq_align_set());
166 }
167 retval.push_back(sa);
168 }
169 return retval;
170 }
171
172 TSeqAlignVector
Run()173 CBl2Seq::Run()
174 {
175 if (m_Results.NotEmpty()) {
176 // return cached results from previous run
177 return CBl2Seq::CSearchResultSet2TSeqAlignVector(m_Results);
178 }
179
180 (void) RunEx();
181 x_BuildAncillaryData();
182 return CBl2Seq::CSearchResultSet2TSeqAlignVector(m_Results);
183 }
184
185 void
x_BuildAncillaryData()186 CBl2Seq::x_BuildAncillaryData()
187 {
188 m_AncillaryData.clear();
189 m_AncillaryData.reserve(m_Results->size());
190 ITERATE(CSearchResultSet, r, *m_Results) {
191 m_AncillaryData.push_back((*r)->GetAncillaryData());
192 }
193 }
194
195 CRef<CSearchResultSet>
RunEx()196 CBl2Seq::RunEx()
197 {
198 x_InitCLocalBlast();
199 if (m_Results.NotEmpty()) {
200 // return cached results from previous run
201 return m_Results;
202 }
203
204 //m_OptsHandle->GetOptions().DebugDumpText(cerr, "m_OptsHandle", 1);
205 _ASSERT(m_Blast.NotEmpty());
206 m_Results = m_Blast->Run();
207 m_Messages = m_Blast->GetSearchMessages();
208 if (m_Blast->m_InternalData.NotEmpty()) {
209 mi_pDiagnostics =
210 Blast_DiagnosticsCopy(m_Blast->m_InternalData->m_Diagnostics->GetPointer());
211 }
212 return m_Results;
213 }
214
215 TSeqLocInfoVector
GetFilteredQueryRegions() const216 CBl2Seq::GetFilteredQueryRegions() const
217 {
218 return m_Results->GetFilteredQueryRegions();
219 }
220
221 void
GetFilteredSubjectRegions(vector<TSeqLocInfoVector> & retval) const222 CBl2Seq::GetFilteredSubjectRegions(vector<TSeqLocInfoVector>& retval) const
223 {
224 retval.clear();
225 if (m_Results.Empty() || m_Results->empty()) {
226 return;
227 }
228 ITERATE(CSearchResultSet, res, *m_Results) {
229 TSeqLocInfoVector subj_masks;
230 (*res)->GetSubjectMasks(subj_masks);
231 retval.push_back(subj_masks);
232 }
233 }
234
235 END_SCOPE(blast)
236 END_NCBI_SCOPE
237
238 /* @} */
239