1 /* ===========================================================================
2  *
3  *                            PUBLIC DOMAIN NOTICE
4  *               National Center for Biotechnology Information
5  *
6  *  This software/database is a "United States Government Work" under the
7  *  terms of the United States Copyright Act.  It was written as part of
8  *  the author's official duties as a United States Government employee and
9  *  thus cannot be copyrighted.  This software/database is freely available
10  *  to the public for use. The National Library of Medicine and the U.S.
11  *  Government have not placed any restriction on its use or reproduction.
12  *
13  *  Although all reasonable efforts have been taken to ensure the accuracy
14  *  and reliability of the software and data, the NLM and the U.S.
15  *  Government do not and cannot warrant the performance or results that
16  *  may be obtained by using this software or data. The NLM and the U.S.
17  *  Government disclaim all warranties, express or implied, including
18  *  warranties of performance, merchantability or fitness for any particular
19  *  purpose.
20  *
21  *  Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  * Author:  Christiam Camacho
26  *
27  */
28 
29 /** @file traceback_stage.cpp
30  * NOTE: This file contains work in progress and the APIs are likely to change,
31  * please do not rely on them until this notice is removed.
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <algo/blast/api/traceback_stage.hpp>
36 #include <algo/blast/api/uniform_search.hpp>    // for CSearchDatabase
37 #include <algo/blast/api/seqinfosrc_seqdb.hpp>  // for CSeqDbSeqInfoSrc
38 #include <objtools/blast/seqdb_reader/seqdb.hpp>     // for CSeqDb
39 #include <algo/blast/api/subj_ranges_set.hpp>
40 
41 #include "blast_memento_priv.hpp"
42 #include "blast_seqalign.hpp"
43 #include "blast_aux_priv.hpp"
44 #include "psiblast_aux_priv.hpp"
45 
46 // CORE BLAST includes
47 #include <algo/blast/core/blast_setup.h>
48 #include <algo/blast/core/blast_traceback.h>
49 #include <algo/blast/core/blast_hits.h>
50 
51 /** @addtogroup AlgoBlast
52  *
53  * @{
54  */
55 
56 BEGIN_NCBI_SCOPE
57 USING_SCOPE(objects);
BEGIN_SCOPE(blast)58 BEGIN_SCOPE(blast)
59 
60 CBlastTracebackSearch::CBlastTracebackSearch(CRef<IQueryFactory>   qf,
61                                              CRef<CBlastOptions>   opts,
62                                              BlastSeqSrc         * seqsrc,
63                                              CRef<IBlastSeqInfoSrc> seqinfosrc,
64                                              CRef<TBlastHSPStream> hsps,
65                                              CConstRef<objects::CPssmWithParameters> pssm)
66     : m_QueryFactory (qf),
67       m_Options      (opts),
68       m_InternalData (new SInternalData),
69       m_OptsMemento  (0),
70       m_SeqInfoSrc   (seqinfosrc),
71       m_ResultType(eDatabaseSearch),
72       m_DBscanInfo(0)
73 {
74     x_Init(qf, opts, pssm, BlastSeqSrcGetName(seqsrc), hsps);
75     m_InternalData->m_SeqSrc.Reset(new TBlastSeqSrc(seqsrc, 0));
76     m_InternalData->m_FnInterrupt = NULL;
77     m_InternalData->m_ProgressMonitor.Reset(new CSBlastProgress(NULL));
78 }
79 
CBlastTracebackSearch(CRef<IQueryFactory> qf,CRef<SInternalData> internal_data,CRef<CBlastOptions> opts,CRef<IBlastSeqInfoSrc> seqinfosrc,TSearchMessages & search_msgs)80 CBlastTracebackSearch::CBlastTracebackSearch(CRef<IQueryFactory> qf,
81                                              CRef<SInternalData> internal_data,
82                                              CRef<CBlastOptions>   opts,
83                                              CRef<IBlastSeqInfoSrc> seqinfosrc,
84                                              TSearchMessages& search_msgs)
85     : m_QueryFactory (qf),
86       m_Options      (opts),
87       m_InternalData (internal_data),
88       m_OptsMemento  (opts->CreateSnapshot()),
89       m_Messages     (search_msgs),
90       m_SeqInfoSrc   (seqinfosrc),
91       m_ResultType(eDatabaseSearch),
92       m_DBscanInfo(0)
93 {
94       if (Blast_ProgramIsPhiBlast(opts->GetProgramType())) {
95            if (m_InternalData)
96            {
97               BlastDiagnostics* diag = m_InternalData->m_Diagnostics->GetPointer();
98               if (diag && diag->ungapped_stat)
99               {
100                  CRef<SDatabaseScanData> dbscan_info(new SDatabaseScanData);;
101                  dbscan_info->m_NumPatOccurInDB = (int) diag->ungapped_stat->lookup_hits;
102                  SetDBScanInfo(dbscan_info);
103               }
104            }
105      }
106 }
107 
~CBlastTracebackSearch()108 CBlastTracebackSearch::~CBlastTracebackSearch()
109 {
110     delete m_OptsMemento;
111 }
112 
113 void
SetResultType(EResultType type)114 CBlastTracebackSearch::SetResultType(EResultType type)
115 {
116     m_ResultType = type;
117 }
118 
119 void
SetDBScanInfo(CRef<SDatabaseScanData> dbscan_info)120 CBlastTracebackSearch::SetDBScanInfo(CRef<SDatabaseScanData> dbscan_info)
121 {
122     m_DBscanInfo = dbscan_info;
123 }
124 
125 void
x_Init(CRef<IQueryFactory> qf,CRef<CBlastOptions> opts,CConstRef<objects::CPssmWithParameters> pssm,const string & dbname,CRef<TBlastHSPStream> hsps)126 CBlastTracebackSearch::x_Init(CRef<IQueryFactory>   qf,
127                               CRef<CBlastOptions>   opts,
128                               CConstRef<objects::CPssmWithParameters> pssm,
129                               const string        & dbname,
130                               CRef<TBlastHSPStream> hsps)
131 {
132     opts->Validate();
133 
134     // 1. Initialize the query data (borrow it from the factory)
135     CRef<ILocalQueryData> query_data(qf->MakeLocalQueryData(&*opts));
136     m_InternalData->m_Queries = query_data->GetSequenceBlk();
137     m_InternalData->m_QueryInfo = query_data->GetQueryInfo();
138 
139     query_data->GetMessages(m_Messages);
140 
141     // 2. Take care of any rps information
142     if (Blast_ProgramIsRpsBlast(opts->GetProgramType())) {
143         m_InternalData->m_RpsData =
144             CSetupFactory::CreateRpsStructures(dbname, opts);
145     }
146 
147     // 3. Create the options memento
148     m_OptsMemento = opts->CreateSnapshot();
149 
150     const bool kIsPhiBlast =
151 		Blast_ProgramIsPhiBlast(m_OptsMemento->m_ProgramType) ? true : false;
152 
153     // 4. Create the BlastScoreBlk
154     // Note: we don't need masked query regions, as these are generally created
155     // in the preliminary stage of the BLAST search
156     BlastSeqLoc* lookup_segments(0);
157     BlastScoreBlk* sbp =
158         CSetupFactory::CreateScoreBlock(m_OptsMemento, query_data,
159                                         kIsPhiBlast ? &lookup_segments : 0,
160                                         m_Messages, 0,
161                                         m_InternalData->m_RpsData);
162     m_InternalData->m_ScoreBlk.Reset
163         (new TBlastScoreBlk(sbp, BlastScoreBlkFree));
164     if (pssm.NotEmpty()) {
165         PsiBlastSetupScoreBlock(sbp, pssm, m_Messages, opts);
166     }
167 
168     // N.B.: Only PHI BLAST pseudo lookup table is necessary
169     if (kIsPhiBlast) {
170         _ASSERT(lookup_segments);
171         _ASSERT(m_InternalData->m_RpsData == NULL);
172         CRef< CBlastSeqLocWrap > lookup_segments_wrap(
173                 new CBlastSeqLocWrap( lookup_segments ) );
174         LookupTableWrap* lut =
175             CSetupFactory::CreateLookupTable(query_data, m_OptsMemento,
176                  m_InternalData->m_ScoreBlk->GetPointer(), lookup_segments_wrap);
177         m_InternalData->m_LookupTable.Reset
178             (new TLookupTableWrap(lut, LookupTableWrapFree));
179     }
180 
181     // 5. Create diagnostics
182     BlastDiagnostics* diags = CSetupFactory::CreateDiagnosticsStructure();
183     m_InternalData->m_Diagnostics.Reset
184         (new TBlastDiagnostics(diags, Blast_DiagnosticsFree));
185 
186     // 6. Attach HSP stream
187     m_InternalData->m_HspStream.Reset(hsps);
188 }
189 
190 CRef<CSearchResultSet>
Run()191 CBlastTracebackSearch::Run()
192 {
193     _ASSERT(m_OptsMemento);
194     SPHIPatternSearchBlk* phi_lookup_table(0);
195 
196     // For PHI BLAST we need to pass the pattern search items structure to the
197     // traceback code
198     bool is_phi = !! Blast_ProgramIsPhiBlast(m_OptsMemento->m_ProgramType);
199 
200     if (is_phi) {
201         _ASSERT(m_InternalData->m_LookupTable);
202         _ASSERT(m_DBscanInfo && m_DBscanInfo->m_NumPatOccurInDB !=
203                 m_DBscanInfo->kNoPhiBlastPattern);
204         phi_lookup_table = (SPHIPatternSearchBlk*)
205             m_InternalData->m_LookupTable->GetPointer()->lut;
206         phi_lookup_table->num_patterns_db = m_DBscanInfo->m_NumPatOccurInDB;
207     }
208     else
209     {
210         m_InternalData->m_LookupTable.Reset(NULL);
211     }
212 
213     // When dealing with PSI-BLAST iterations, we need to keep a larger
214     // alignment for the PSSM engine as to replicate blastpgp's behavior
215     int hitlist_size_backup = m_OptsMemento->m_HitSaveOpts->hitlist_size;
216     if (m_OptsMemento->m_ProgramType == eBlastTypePsiBlast ) {
217         SBlastHitsParameters* bhp = NULL;
218         SBlastHitsParametersNew(m_OptsMemento->m_HitSaveOpts,
219                                 m_OptsMemento->m_ExtnOpts,
220                                 m_OptsMemento->m_ScoringOpts,
221                                 &bhp);
222         m_OptsMemento->m_HitSaveOpts->hitlist_size = bhp->prelim_hitlist_size;
223         SBlastHitsParametersFree(bhp);
224     }
225 
226     auto_ptr<CAutoEnvironmentVariable> omp_env;
227     if (m_NumThreads > kMinNumThreads) {
228         omp_env.reset(new CAutoEnvironmentVariable("OMP_WAIT_POLICY", "passive"));
229     }
230 
231     BlastHSPResults * hsp_results(0);
232     int status =
233         Blast_RunTracebackSearchWithInterrupt(m_OptsMemento->m_ProgramType,
234                                  m_InternalData->m_Queries,
235                                  m_InternalData->m_QueryInfo,
236                                  m_InternalData->m_SeqSrc->GetPointer(),
237                                  m_OptsMemento->m_ScoringOpts,
238                                  m_OptsMemento->m_ExtnOpts,
239                                  m_OptsMemento->m_HitSaveOpts,
240                                  m_OptsMemento->m_EffLenOpts,
241                                  m_OptsMemento->m_DbOpts,
242                                  m_OptsMemento->m_PSIBlastOpts,
243                                  m_InternalData->m_ScoreBlk->GetPointer(),
244                                  m_InternalData->m_HspStream->GetPointer(),
245                                  m_InternalData->m_RpsData ?
246                                  (*m_InternalData->m_RpsData)() : 0,
247                                  phi_lookup_table,
248                                  & hsp_results,
249                                  m_InternalData->m_FnInterrupt,
250                                  m_InternalData->m_ProgressMonitor->Get(), m_NumThreads);
251     if (status) {
252         NCBI_THROW(CBlastException, eCoreBlastError, "Traceback failed");
253     }
254 
255     // This is the data resulting from the traceback phase (before it is converted to ASN.1).
256     // We wrap it this way so it is released even if an exception is thrown below.
257     CRef< CStructWrapper<BlastHSPResults> > HspResults;
258     HspResults.Reset(WrapStruct(hsp_results, Blast_HSPResultsFree));
259 
260     _ASSERT(m_SeqInfoSrc);
261     _ASSERT(m_QueryFactory);
262     m_OptsMemento->m_HitSaveOpts->hitlist_size = hitlist_size_backup;
263 
264     CRef<ILocalQueryData> qdata = m_QueryFactory->MakeLocalQueryData(m_Options);
265 
266     vector<TSeqLocInfoVector> subj_masks;
267     TSeqAlignVector aligns =
268         LocalBlastResults2SeqAlign(hsp_results,
269                                    *qdata,
270                                    *m_SeqInfoSrc,
271                                    m_OptsMemento->m_ProgramType,
272                                    m_Options->GetGappedMode(),
273                                    m_Options->GetOutOfFrameMode(),
274                                    subj_masks,
275                                    m_ResultType);
276 
277     vector< CConstRef<CSeq_id> > query_ids;
278     query_ids.reserve(aligns.size());
279     for (size_t i = 0; i < qdata->GetNumQueries(); i++) {
280         query_ids.push_back(CConstRef<CSeq_id>(qdata->GetSeq_loc(i)->GetId()));
281     }
282 
283     return BlastBuildSearchResultSet(query_ids,
284                                      m_InternalData->m_ScoreBlk->GetPointer(),
285                                      m_InternalData->m_QueryInfo,
286                                      m_OptsMemento->m_ProgramType,
287                                      aligns,
288                                      m_Messages,
289                                      subj_masks,
290                                      NULL,
291                                      m_ResultType);
292 }
293 
294 BlastHSPResults*
RunSimple()295 CBlastTracebackSearch::RunSimple()
296 {
297     _ASSERT(m_OptsMemento);
298     SPHIPatternSearchBlk* phi_lookup_table(0);
299 
300     // For PHI BLAST we need to pass the pattern search items structure to the
301     // traceback code
302     bool is_phi = !! Blast_ProgramIsPhiBlast(m_OptsMemento->m_ProgramType);
303 
304     if (is_phi) {
305         _ASSERT(m_InternalData->m_LookupTable);
306         _ASSERT(m_DBscanInfo && m_DBscanInfo->m_NumPatOccurInDB !=
307                 m_DBscanInfo->kNoPhiBlastPattern);
308         phi_lookup_table = (SPHIPatternSearchBlk*)
309             m_InternalData->m_LookupTable->GetPointer()->lut;
310         phi_lookup_table->num_patterns_db = m_DBscanInfo->m_NumPatOccurInDB;
311     }
312     else
313     {
314         m_InternalData->m_LookupTable.Reset(NULL);
315     }
316 
317     // When dealing with PSI-BLAST iterations, we need to keep a larger
318     // alignment for the PSSM engine as to replicate blastpgp's behavior
319     if (m_OptsMemento->m_ProgramType == eBlastTypePsiBlast ) {
320         SBlastHitsParameters* bhp = NULL;
321         SBlastHitsParametersNew(m_OptsMemento->m_HitSaveOpts,
322                                 m_OptsMemento->m_ExtnOpts,
323                                 m_OptsMemento->m_ScoringOpts,
324                                 &bhp);
325         m_OptsMemento->m_HitSaveOpts->hitlist_size = bhp->prelim_hitlist_size;
326         SBlastHitsParametersFree(bhp);
327     }
328 
329     auto_ptr<CAutoEnvironmentVariable> omp_env;
330     if (m_NumThreads > kMinNumThreads) {
331         omp_env.reset(new CAutoEnvironmentVariable("OMP_WAIT_POLICY", "passive"));
332     }
333 
334     BlastHSPResults * hsp_results(0);
335     int status =
336         Blast_RunTracebackSearchWithInterrupt(m_OptsMemento->m_ProgramType,
337                                  m_InternalData->m_Queries,
338                                  m_InternalData->m_QueryInfo,
339                                  m_InternalData->m_SeqSrc->GetPointer(),
340                                  m_OptsMemento->m_ScoringOpts,
341                                  m_OptsMemento->m_ExtnOpts,
342                                  m_OptsMemento->m_HitSaveOpts,
343                                  m_OptsMemento->m_EffLenOpts,
344                                  m_OptsMemento->m_DbOpts,
345                                  m_OptsMemento->m_PSIBlastOpts,
346                                  m_InternalData->m_ScoreBlk->GetPointer(),
347                                  m_InternalData->m_HspStream->GetPointer(),
348                                  m_InternalData->m_RpsData ?
349                                  (*m_InternalData->m_RpsData)() : 0,
350                                  phi_lookup_table,
351                                  & hsp_results,
352                                  m_InternalData->m_FnInterrupt,
353                                  m_InternalData->m_ProgressMonitor->Get(), m_NumThreads);
354     if (status) {
355         NCBI_THROW(CBlastException, eCoreBlastError, "Traceback failed");
356     }
357     return hsp_results;
358 }
359 
360 END_SCOPE(blast)
361 END_NCBI_SCOPE
362 
363 /* @} */
364 
365