1 /* ===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 * Author: Christiam Camacho
26 *
27 */
28
29 /** @file traceback_stage.cpp
30 * NOTE: This file contains work in progress and the APIs are likely to change,
31 * please do not rely on them until this notice is removed.
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <algo/blast/api/traceback_stage.hpp>
36 #include <algo/blast/api/uniform_search.hpp> // for CSearchDatabase
37 #include <algo/blast/api/seqinfosrc_seqdb.hpp> // for CSeqDbSeqInfoSrc
38 #include <objtools/blast/seqdb_reader/seqdb.hpp> // for CSeqDb
39 #include <algo/blast/api/subj_ranges_set.hpp>
40
41 #include "blast_memento_priv.hpp"
42 #include "blast_seqalign.hpp"
43 #include "blast_aux_priv.hpp"
44 #include "psiblast_aux_priv.hpp"
45
46 // CORE BLAST includes
47 #include <algo/blast/core/blast_setup.h>
48 #include <algo/blast/core/blast_traceback.h>
49 #include <algo/blast/core/blast_hits.h>
50
51 /** @addtogroup AlgoBlast
52 *
53 * @{
54 */
55
56 BEGIN_NCBI_SCOPE
57 USING_SCOPE(objects);
BEGIN_SCOPE(blast)58 BEGIN_SCOPE(blast)
59
60 CBlastTracebackSearch::CBlastTracebackSearch(CRef<IQueryFactory> qf,
61 CRef<CBlastOptions> opts,
62 BlastSeqSrc * seqsrc,
63 CRef<IBlastSeqInfoSrc> seqinfosrc,
64 CRef<TBlastHSPStream> hsps,
65 CConstRef<objects::CPssmWithParameters> pssm)
66 : m_QueryFactory (qf),
67 m_Options (opts),
68 m_InternalData (new SInternalData),
69 m_OptsMemento (0),
70 m_SeqInfoSrc (seqinfosrc),
71 m_ResultType(eDatabaseSearch),
72 m_DBscanInfo(0)
73 {
74 x_Init(qf, opts, pssm, BlastSeqSrcGetName(seqsrc), hsps);
75 m_InternalData->m_SeqSrc.Reset(new TBlastSeqSrc(seqsrc, 0));
76 m_InternalData->m_FnInterrupt = NULL;
77 m_InternalData->m_ProgressMonitor.Reset(new CSBlastProgress(NULL));
78 }
79
CBlastTracebackSearch(CRef<IQueryFactory> qf,CRef<SInternalData> internal_data,CRef<CBlastOptions> opts,CRef<IBlastSeqInfoSrc> seqinfosrc,TSearchMessages & search_msgs)80 CBlastTracebackSearch::CBlastTracebackSearch(CRef<IQueryFactory> qf,
81 CRef<SInternalData> internal_data,
82 CRef<CBlastOptions> opts,
83 CRef<IBlastSeqInfoSrc> seqinfosrc,
84 TSearchMessages& search_msgs)
85 : m_QueryFactory (qf),
86 m_Options (opts),
87 m_InternalData (internal_data),
88 m_OptsMemento (opts->CreateSnapshot()),
89 m_Messages (search_msgs),
90 m_SeqInfoSrc (seqinfosrc),
91 m_ResultType(eDatabaseSearch),
92 m_DBscanInfo(0)
93 {
94 if (Blast_ProgramIsPhiBlast(opts->GetProgramType())) {
95 if (m_InternalData)
96 {
97 BlastDiagnostics* diag = m_InternalData->m_Diagnostics->GetPointer();
98 if (diag && diag->ungapped_stat)
99 {
100 CRef<SDatabaseScanData> dbscan_info(new SDatabaseScanData);;
101 dbscan_info->m_NumPatOccurInDB = (int) diag->ungapped_stat->lookup_hits;
102 SetDBScanInfo(dbscan_info);
103 }
104 }
105 }
106 }
107
~CBlastTracebackSearch()108 CBlastTracebackSearch::~CBlastTracebackSearch()
109 {
110 delete m_OptsMemento;
111 }
112
113 void
SetResultType(EResultType type)114 CBlastTracebackSearch::SetResultType(EResultType type)
115 {
116 m_ResultType = type;
117 }
118
119 void
SetDBScanInfo(CRef<SDatabaseScanData> dbscan_info)120 CBlastTracebackSearch::SetDBScanInfo(CRef<SDatabaseScanData> dbscan_info)
121 {
122 m_DBscanInfo = dbscan_info;
123 }
124
125 void
x_Init(CRef<IQueryFactory> qf,CRef<CBlastOptions> opts,CConstRef<objects::CPssmWithParameters> pssm,const string & dbname,CRef<TBlastHSPStream> hsps)126 CBlastTracebackSearch::x_Init(CRef<IQueryFactory> qf,
127 CRef<CBlastOptions> opts,
128 CConstRef<objects::CPssmWithParameters> pssm,
129 const string & dbname,
130 CRef<TBlastHSPStream> hsps)
131 {
132 opts->Validate();
133
134 // 1. Initialize the query data (borrow it from the factory)
135 CRef<ILocalQueryData> query_data(qf->MakeLocalQueryData(&*opts));
136 m_InternalData->m_Queries = query_data->GetSequenceBlk();
137 m_InternalData->m_QueryInfo = query_data->GetQueryInfo();
138
139 query_data->GetMessages(m_Messages);
140
141 // 2. Take care of any rps information
142 if (Blast_ProgramIsRpsBlast(opts->GetProgramType())) {
143 m_InternalData->m_RpsData =
144 CSetupFactory::CreateRpsStructures(dbname, opts);
145 }
146
147 // 3. Create the options memento
148 m_OptsMemento = opts->CreateSnapshot();
149
150 const bool kIsPhiBlast =
151 Blast_ProgramIsPhiBlast(m_OptsMemento->m_ProgramType) ? true : false;
152
153 // 4. Create the BlastScoreBlk
154 // Note: we don't need masked query regions, as these are generally created
155 // in the preliminary stage of the BLAST search
156 BlastSeqLoc* lookup_segments(0);
157 BlastScoreBlk* sbp =
158 CSetupFactory::CreateScoreBlock(m_OptsMemento, query_data,
159 kIsPhiBlast ? &lookup_segments : 0,
160 m_Messages, 0,
161 m_InternalData->m_RpsData);
162 m_InternalData->m_ScoreBlk.Reset
163 (new TBlastScoreBlk(sbp, BlastScoreBlkFree));
164 if (pssm.NotEmpty()) {
165 PsiBlastSetupScoreBlock(sbp, pssm, m_Messages, opts);
166 }
167
168 // N.B.: Only PHI BLAST pseudo lookup table is necessary
169 if (kIsPhiBlast) {
170 _ASSERT(lookup_segments);
171 _ASSERT(m_InternalData->m_RpsData == NULL);
172 CRef< CBlastSeqLocWrap > lookup_segments_wrap(
173 new CBlastSeqLocWrap( lookup_segments ) );
174 LookupTableWrap* lut =
175 CSetupFactory::CreateLookupTable(query_data, m_OptsMemento,
176 m_InternalData->m_ScoreBlk->GetPointer(), lookup_segments_wrap);
177 m_InternalData->m_LookupTable.Reset
178 (new TLookupTableWrap(lut, LookupTableWrapFree));
179 }
180
181 // 5. Create diagnostics
182 BlastDiagnostics* diags = CSetupFactory::CreateDiagnosticsStructure();
183 m_InternalData->m_Diagnostics.Reset
184 (new TBlastDiagnostics(diags, Blast_DiagnosticsFree));
185
186 // 6. Attach HSP stream
187 m_InternalData->m_HspStream.Reset(hsps);
188 }
189
190 CRef<CSearchResultSet>
Run()191 CBlastTracebackSearch::Run()
192 {
193 _ASSERT(m_OptsMemento);
194 SPHIPatternSearchBlk* phi_lookup_table(0);
195
196 // For PHI BLAST we need to pass the pattern search items structure to the
197 // traceback code
198 bool is_phi = !! Blast_ProgramIsPhiBlast(m_OptsMemento->m_ProgramType);
199
200 if (is_phi) {
201 _ASSERT(m_InternalData->m_LookupTable);
202 _ASSERT(m_DBscanInfo && m_DBscanInfo->m_NumPatOccurInDB !=
203 m_DBscanInfo->kNoPhiBlastPattern);
204 phi_lookup_table = (SPHIPatternSearchBlk*)
205 m_InternalData->m_LookupTable->GetPointer()->lut;
206 phi_lookup_table->num_patterns_db = m_DBscanInfo->m_NumPatOccurInDB;
207 }
208 else
209 {
210 m_InternalData->m_LookupTable.Reset(NULL);
211 }
212
213 // When dealing with PSI-BLAST iterations, we need to keep a larger
214 // alignment for the PSSM engine as to replicate blastpgp's behavior
215 int hitlist_size_backup = m_OptsMemento->m_HitSaveOpts->hitlist_size;
216 if (m_OptsMemento->m_ProgramType == eBlastTypePsiBlast ) {
217 SBlastHitsParameters* bhp = NULL;
218 SBlastHitsParametersNew(m_OptsMemento->m_HitSaveOpts,
219 m_OptsMemento->m_ExtnOpts,
220 m_OptsMemento->m_ScoringOpts,
221 &bhp);
222 m_OptsMemento->m_HitSaveOpts->hitlist_size = bhp->prelim_hitlist_size;
223 SBlastHitsParametersFree(bhp);
224 }
225
226 auto_ptr<CAutoEnvironmentVariable> omp_env;
227 if (m_NumThreads > kMinNumThreads) {
228 omp_env.reset(new CAutoEnvironmentVariable("OMP_WAIT_POLICY", "passive"));
229 }
230
231 BlastHSPResults * hsp_results(0);
232 int status =
233 Blast_RunTracebackSearchWithInterrupt(m_OptsMemento->m_ProgramType,
234 m_InternalData->m_Queries,
235 m_InternalData->m_QueryInfo,
236 m_InternalData->m_SeqSrc->GetPointer(),
237 m_OptsMemento->m_ScoringOpts,
238 m_OptsMemento->m_ExtnOpts,
239 m_OptsMemento->m_HitSaveOpts,
240 m_OptsMemento->m_EffLenOpts,
241 m_OptsMemento->m_DbOpts,
242 m_OptsMemento->m_PSIBlastOpts,
243 m_InternalData->m_ScoreBlk->GetPointer(),
244 m_InternalData->m_HspStream->GetPointer(),
245 m_InternalData->m_RpsData ?
246 (*m_InternalData->m_RpsData)() : 0,
247 phi_lookup_table,
248 & hsp_results,
249 m_InternalData->m_FnInterrupt,
250 m_InternalData->m_ProgressMonitor->Get(), m_NumThreads);
251 if (status) {
252 NCBI_THROW(CBlastException, eCoreBlastError, "Traceback failed");
253 }
254
255 // This is the data resulting from the traceback phase (before it is converted to ASN.1).
256 // We wrap it this way so it is released even if an exception is thrown below.
257 CRef< CStructWrapper<BlastHSPResults> > HspResults;
258 HspResults.Reset(WrapStruct(hsp_results, Blast_HSPResultsFree));
259
260 _ASSERT(m_SeqInfoSrc);
261 _ASSERT(m_QueryFactory);
262 m_OptsMemento->m_HitSaveOpts->hitlist_size = hitlist_size_backup;
263
264 CRef<ILocalQueryData> qdata = m_QueryFactory->MakeLocalQueryData(m_Options);
265
266 vector<TSeqLocInfoVector> subj_masks;
267 TSeqAlignVector aligns =
268 LocalBlastResults2SeqAlign(hsp_results,
269 *qdata,
270 *m_SeqInfoSrc,
271 m_OptsMemento->m_ProgramType,
272 m_Options->GetGappedMode(),
273 m_Options->GetOutOfFrameMode(),
274 subj_masks,
275 m_ResultType);
276
277 vector< CConstRef<CSeq_id> > query_ids;
278 query_ids.reserve(aligns.size());
279 for (size_t i = 0; i < qdata->GetNumQueries(); i++) {
280 query_ids.push_back(CConstRef<CSeq_id>(qdata->GetSeq_loc(i)->GetId()));
281 }
282
283 return BlastBuildSearchResultSet(query_ids,
284 m_InternalData->m_ScoreBlk->GetPointer(),
285 m_InternalData->m_QueryInfo,
286 m_OptsMemento->m_ProgramType,
287 aligns,
288 m_Messages,
289 subj_masks,
290 NULL,
291 m_ResultType);
292 }
293
294 BlastHSPResults*
RunSimple()295 CBlastTracebackSearch::RunSimple()
296 {
297 _ASSERT(m_OptsMemento);
298 SPHIPatternSearchBlk* phi_lookup_table(0);
299
300 // For PHI BLAST we need to pass the pattern search items structure to the
301 // traceback code
302 bool is_phi = !! Blast_ProgramIsPhiBlast(m_OptsMemento->m_ProgramType);
303
304 if (is_phi) {
305 _ASSERT(m_InternalData->m_LookupTable);
306 _ASSERT(m_DBscanInfo && m_DBscanInfo->m_NumPatOccurInDB !=
307 m_DBscanInfo->kNoPhiBlastPattern);
308 phi_lookup_table = (SPHIPatternSearchBlk*)
309 m_InternalData->m_LookupTable->GetPointer()->lut;
310 phi_lookup_table->num_patterns_db = m_DBscanInfo->m_NumPatOccurInDB;
311 }
312 else
313 {
314 m_InternalData->m_LookupTable.Reset(NULL);
315 }
316
317 // When dealing with PSI-BLAST iterations, we need to keep a larger
318 // alignment for the PSSM engine as to replicate blastpgp's behavior
319 if (m_OptsMemento->m_ProgramType == eBlastTypePsiBlast ) {
320 SBlastHitsParameters* bhp = NULL;
321 SBlastHitsParametersNew(m_OptsMemento->m_HitSaveOpts,
322 m_OptsMemento->m_ExtnOpts,
323 m_OptsMemento->m_ScoringOpts,
324 &bhp);
325 m_OptsMemento->m_HitSaveOpts->hitlist_size = bhp->prelim_hitlist_size;
326 SBlastHitsParametersFree(bhp);
327 }
328
329 auto_ptr<CAutoEnvironmentVariable> omp_env;
330 if (m_NumThreads > kMinNumThreads) {
331 omp_env.reset(new CAutoEnvironmentVariable("OMP_WAIT_POLICY", "passive"));
332 }
333
334 BlastHSPResults * hsp_results(0);
335 int status =
336 Blast_RunTracebackSearchWithInterrupt(m_OptsMemento->m_ProgramType,
337 m_InternalData->m_Queries,
338 m_InternalData->m_QueryInfo,
339 m_InternalData->m_SeqSrc->GetPointer(),
340 m_OptsMemento->m_ScoringOpts,
341 m_OptsMemento->m_ExtnOpts,
342 m_OptsMemento->m_HitSaveOpts,
343 m_OptsMemento->m_EffLenOpts,
344 m_OptsMemento->m_DbOpts,
345 m_OptsMemento->m_PSIBlastOpts,
346 m_InternalData->m_ScoreBlk->GetPointer(),
347 m_InternalData->m_HspStream->GetPointer(),
348 m_InternalData->m_RpsData ?
349 (*m_InternalData->m_RpsData)() : 0,
350 phi_lookup_table,
351 & hsp_results,
352 m_InternalData->m_FnInterrupt,
353 m_InternalData->m_ProgressMonitor->Get(), m_NumThreads);
354 if (status) {
355 NCBI_THROW(CBlastException, eCoreBlastError, "Traceback failed");
356 }
357 return hsp_results;
358 }
359
360 END_SCOPE(blast)
361 END_NCBI_SCOPE
362
363 /* @} */
364
365