1 /* $Id: build_archive.cpp 591152 2019-08-12 11:18:21Z fongah2 $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Tom Madden
27 *
28 * ===========================================================================
29 */
30
31 /// @file build_archive.cpp
32 /// Builds archive format from BLAST results.
33
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbi_system.hpp>
36 #include <serial/iterator.hpp>
37 #include <algo/blast/format/build_archive.hpp>
38 #include <algo/blast/api/version.hpp>
39 #include <algo/blast/api/remote_blast.hpp>
40 #include <algo/blast/api/blast_options_builder.hpp>
41 #include <algo/blast/api/uniform_search.hpp>
42 #include <algo/blast/api/blast_results.hpp>
43 #include <algo/blast/api/search_strategy.hpp>
44
45 #include <objects/seqalign/Seq_align_set.hpp>
46 #include <objects/seqalign/Seq_align.hpp>
47 #include <objects/blast/Blast4_ka_block.hpp>
48 #include <objects/blast/blast__.hpp>
49 #include <objects/blast/names.hpp>
50
51 #if defined(NCBI_OS_UNIX)
52 #include <unistd.h>
53 #endif
54
55 /** @addtogroup AlgoBlast
56 *
57 * @{
58 */
59
60 BEGIN_NCBI_SCOPE
61 USING_SCOPE(objects);
BEGIN_SCOPE(blast)62 BEGIN_SCOPE(blast)
63
64
65 static CRef<CBlast4_ka_block>
66 s_Convert_to_CBlast_ka_block(const Blast_KarlinBlk* kablk, bool gapped)
67 {
68 CRef<CBlast4_ka_block> retval(new CBlast4_ka_block);
69 if (kablk)
70 {
71 retval->SetLambda(kablk->Lambda);
72 retval->SetK(kablk->K);
73 retval->SetH(kablk->H);
74 } else {
75 retval->SetLambda(-1.0);
76 retval->SetK(-1.0);
77 retval->SetH(-1.0);
78 }
79 retval->SetGapped(gapped);
80 return retval;
81 }
82
83 static CRef<objects::CBlast4_archive>
s_BuildArchiveAll(CRef<CExportStrategy> export_strategy,blast::CBlastOptionsHandle & options_handle,const CSearchResultSet & results)84 s_BuildArchiveAll(CRef<CExportStrategy> export_strategy,
85 blast::CBlastOptionsHandle& options_handle,
86 const CSearchResultSet& results)
87 {
88 CRef<objects::CBlast4_archive> archive(new objects::CBlast4_archive());
89
90 CRef<CBlast4_request> net_request = export_strategy->GetSearchStrategy();
91 CBlastVersion v;
92 net_request->SetIdent(v.Print());
93
94 archive->SetRequest(*net_request);
95 bool isPsiblast = false;
96 if(net_request->CanGetBody() && net_request->GetBody().IsQueue_search())
97 {
98 if(net_request->GetBody().GetQueue_search().GetService() == "psi")
99 isPsiblast=true;
100 }
101
102 CRef<CSeq_align_set> seqalign_set(new CSeq_align_set);
103 _ASSERT(seqalign_set.NotEmpty());
104
105 CRef<objects::CBlast4_get_search_results_reply> net_results(new objects::CBlast4_get_search_results_reply());
106
107 TSeqLocInfoVector mask_vector;
108
109 list<CRef<CBlast4_mask> >& net_masks = net_results->SetMasks();
110
111 bool first_time = true;
112 Int8 effective_search_space = 0;
113 Int8 length_adjustment = 0;
114 ITERATE(CSearchResultSet, result, results) {
115 CConstRef<CSeq_align_set> result_set =
116 (*result)->GetSeqAlign();
117 if (result_set.NotEmpty() && !result_set->IsEmpty()) {
118 seqalign_set->Set().insert(seqalign_set->Set().end(),
119 result_set->Get().begin(),
120 result_set->Get().end());
121 }
122 if (first_time)
123 {
124 CRef<CBlastAncillaryData> ancill_data = (*result)->GetAncillaryData();
125 list<CRef<CBlast4_ka_block> >& ka_list = net_results->SetKa_blocks();
126
127 ka_list.push_back(s_Convert_to_CBlast_ka_block(isPsiblast? ancill_data->GetPsiUngappedKarlinBlk():
128 ancill_data->GetUngappedKarlinBlk(), false));
129 ka_list.push_back(s_Convert_to_CBlast_ka_block(isPsiblast? ancill_data->GetPsiGappedKarlinBlk() :
130 ancill_data->GetGappedKarlinBlk(), true));
131 effective_search_space = ancill_data->GetSearchSpace();
132 length_adjustment = ancill_data->GetLengthAdjustment();
133 first_time = false;
134 }
135 TMaskedQueryRegions query_masks;
136 (*result)->GetMaskedQueryRegions(query_masks);
137 mask_vector.push_back(query_masks);
138 }
139 list<CRef<CBlast4_mask> > masks =
140 CRemoteBlast::ConvertToRemoteMasks(mask_vector, options_handle.GetOptions().GetProgramType());
141 net_masks.insert(net_masks.end(), masks.begin(), masks.end());
142
143 list<string>& search_stats = net_results->SetSearch_stats();
144 search_stats.push_back("Effective search space: " + NStr::Int8ToString(effective_search_space));
145 search_stats.push_back("Effective search space used: " + NStr::Int8ToString(effective_search_space));
146 search_stats.push_back("Length adjustment: " + NStr::Int8ToString(length_adjustment));
147
148 net_results->SetAlignments(*seqalign_set);
149 archive->SetResults(*net_results);
150
151 return archive;
152 }
153
154
155 CRef<objects::CBlast4_archive>
BlastBuildArchive(blast::IQueryFactory & queries,blast::CBlastOptionsHandle & options_handle,const CSearchResultSet & results,CRef<CSearchDatabase> search_db,unsigned int num_iters)156 BlastBuildArchive(blast::IQueryFactory& queries,
157 blast::CBlastOptionsHandle& options_handle,
158 const CSearchResultSet& results,
159 CRef<CSearchDatabase> search_db,
160 unsigned int num_iters)
161 {
162 CRef<blast::IQueryFactory> iquery_ref(&queries);
163 CRef<blast::CBlastOptionsHandle> options_ref(&options_handle);
164 CRef<CExportStrategy> export_strategy;
165 if(num_iters != 0)
166 export_strategy.Reset(new CExportStrategy(iquery_ref, options_ref, search_db, kEmptyStr, num_iters));
167 else
168 export_strategy.Reset(new CExportStrategy(iquery_ref, options_ref, search_db));
169
170 CRef<objects::CBlast4_archive> archive = s_BuildArchiveAll(export_strategy, options_handle, results);
171 return archive;
172
173 }
174
175 CRef<objects::CBlast4_archive>
BlastBuildArchive(blast::IQueryFactory & queries,blast::CBlastOptionsHandle & options_handle,const CSearchResultSet & results,blast::IQueryFactory & subjects)176 BlastBuildArchive(blast::IQueryFactory& queries,
177 blast::CBlastOptionsHandle& options_handle,
178 const CSearchResultSet& results,
179 blast::IQueryFactory& subjects)
180 {
181 CRef<blast::IQueryFactory> iquery_ref(&queries);
182 CRef<blast::IQueryFactory> isubject_ref(&subjects);
183 CRef<blast::CBlastOptionsHandle> options_ref(&options_handle);
184 CRef<CExportStrategy> export_strategy(new CExportStrategy(iquery_ref, options_ref, isubject_ref));
185
186 return s_BuildArchiveAll(export_strategy, options_handle, results);
187 }
188
189 CRef<objects::CBlast4_archive>
BlastBuildArchive(objects::CPssmWithParameters & pssm,blast::CBlastOptionsHandle & options_handle,const CSearchResultSet & results,CRef<CSearchDatabase> search_db,unsigned int num_iters)190 BlastBuildArchive(objects::CPssmWithParameters & pssm,
191 blast::CBlastOptionsHandle& options_handle,
192 const CSearchResultSet& results,
193 CRef<CSearchDatabase> search_db,
194 unsigned int num_iters)
195 {
196 CRef<objects::CPssmWithParameters> pssm_ref(&pssm);
197 CRef<blast::CBlastOptionsHandle> options_ref(&options_handle);
198 CRef<CExportStrategy> export_strategy(new CExportStrategy(pssm_ref, options_ref, search_db, kEmptyStr, num_iters));
199
200 CRef<objects::CBlast4_archive> archive = s_BuildArchiveAll(export_strategy, options_handle, results);
201 return archive;
202
203 }
204
205 END_SCOPE(blast)
206 END_NCBI_SCOPE
207
208 /* @} */
209