1 /*  $Id: build_archive.cpp 591152 2019-08-12 11:18:21Z fongah2 $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  Tom Madden
27 *
28 * ===========================================================================
29 */
30 
31 /// @file build_archive.cpp
32 /// Builds archive format from BLAST results.
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbi_system.hpp>
36 #include <serial/iterator.hpp>
37 #include <algo/blast/format/build_archive.hpp>
38 #include <algo/blast/api/version.hpp>
39 #include <algo/blast/api/remote_blast.hpp>
40 #include <algo/blast/api/blast_options_builder.hpp>
41 #include <algo/blast/api/uniform_search.hpp>
42 #include <algo/blast/api/blast_results.hpp>
43 #include <algo/blast/api/search_strategy.hpp>
44 
45 #include <objects/seqalign/Seq_align_set.hpp>
46 #include <objects/seqalign/Seq_align.hpp>
47 #include <objects/blast/Blast4_ka_block.hpp>
48 #include <objects/blast/blast__.hpp>
49 #include <objects/blast/names.hpp>
50 
51 #if defined(NCBI_OS_UNIX)
52 #include <unistd.h>
53 #endif
54 
55 /** @addtogroup AlgoBlast
56  *
57  * @{
58  */
59 
60 BEGIN_NCBI_SCOPE
61 USING_SCOPE(objects);
BEGIN_SCOPE(blast)62 BEGIN_SCOPE(blast)
63 
64 
65 static CRef<CBlast4_ka_block>
66 s_Convert_to_CBlast_ka_block(const Blast_KarlinBlk* kablk, bool gapped)
67 {
68     CRef<CBlast4_ka_block> retval(new CBlast4_ka_block);
69     if (kablk)
70     {
71        retval->SetLambda(kablk->Lambda);
72        retval->SetK(kablk->K);
73        retval->SetH(kablk->H);
74     } else {
75        retval->SetLambda(-1.0);
76        retval->SetK(-1.0);
77        retval->SetH(-1.0);
78     }
79     retval->SetGapped(gapped);
80     return retval;
81 }
82 
83 static CRef<objects::CBlast4_archive>
s_BuildArchiveAll(CRef<CExportStrategy> export_strategy,blast::CBlastOptionsHandle & options_handle,const CSearchResultSet & results)84 s_BuildArchiveAll(CRef<CExportStrategy>  export_strategy,
85                      blast::CBlastOptionsHandle& options_handle,
86                      const CSearchResultSet& results)
87 {
88         CRef<objects::CBlast4_archive> archive(new objects::CBlast4_archive());
89 
90         CRef<CBlast4_request> net_request = export_strategy->GetSearchStrategy();
91         CBlastVersion v;
92         net_request->SetIdent(v.Print());
93 
94         archive->SetRequest(*net_request);
95         bool isPsiblast = false;
96         if(net_request->CanGetBody() && net_request->GetBody().IsQueue_search())
97         {
98         	if(net_request->GetBody().GetQueue_search().GetService() == "psi")
99         		isPsiblast=true;
100         }
101 
102         CRef<CSeq_align_set> seqalign_set(new CSeq_align_set);
103          _ASSERT(seqalign_set.NotEmpty());
104 
105         CRef<objects::CBlast4_get_search_results_reply> net_results(new objects::CBlast4_get_search_results_reply());
106 
107         TSeqLocInfoVector mask_vector;
108 
109         list<CRef<CBlast4_mask> >& net_masks = net_results->SetMasks();
110 
111         bool first_time = true;
112         Int8 effective_search_space = 0;
113         Int8 length_adjustment = 0;
114         ITERATE(CSearchResultSet, result, results) {
115              CConstRef<CSeq_align_set> result_set =
116                         (*result)->GetSeqAlign();
117              if (result_set.NotEmpty() && !result_set->IsEmpty()) {
118                  seqalign_set->Set().insert(seqalign_set->Set().end(),
119                                                    result_set->Get().begin(),
120                                                    result_set->Get().end());
121              }
122              if (first_time)
123              {
124                     CRef<CBlastAncillaryData> ancill_data = (*result)->GetAncillaryData();
125                     list<CRef<CBlast4_ka_block> >& ka_list = net_results->SetKa_blocks();
126 
127                     ka_list.push_back(s_Convert_to_CBlast_ka_block(isPsiblast? ancill_data->GetPsiUngappedKarlinBlk():
128                     														   ancill_data->GetUngappedKarlinBlk(), false));
129                     ka_list.push_back(s_Convert_to_CBlast_ka_block(isPsiblast? ancill_data->GetPsiGappedKarlinBlk() :
130                     		    											   ancill_data->GetGappedKarlinBlk(), true));
131                     effective_search_space = ancill_data->GetSearchSpace();
132                     length_adjustment = ancill_data->GetLengthAdjustment();
133                     first_time = false;
134              }
135              TMaskedQueryRegions query_masks;
136              (*result)->GetMaskedQueryRegions(query_masks);
137              mask_vector.push_back(query_masks);
138         }
139         list<CRef<CBlast4_mask> > masks =
140                    CRemoteBlast::ConvertToRemoteMasks(mask_vector, options_handle.GetOptions().GetProgramType());
141         net_masks.insert(net_masks.end(), masks.begin(), masks.end());
142 
143         list<string>& search_stats = net_results->SetSearch_stats();
144         search_stats.push_back("Effective search space: " + NStr::Int8ToString(effective_search_space));
145         search_stats.push_back("Effective search space used: " + NStr::Int8ToString(effective_search_space));
146         search_stats.push_back("Length adjustment: " + NStr::Int8ToString(length_adjustment));
147 
148         net_results->SetAlignments(*seqalign_set);
149         archive->SetResults(*net_results);
150 
151         return archive;
152 }
153 
154 
155 CRef<objects::CBlast4_archive>
BlastBuildArchive(blast::IQueryFactory & queries,blast::CBlastOptionsHandle & options_handle,const CSearchResultSet & results,CRef<CSearchDatabase> search_db,unsigned int num_iters)156 BlastBuildArchive(blast::IQueryFactory& queries,
157                      blast::CBlastOptionsHandle& options_handle,
158                      const CSearchResultSet& results,
159                      CRef<CSearchDatabase>  search_db,
160                      unsigned int num_iters)
161 {
162         CRef<blast::IQueryFactory> iquery_ref(&queries);
163         CRef<blast::CBlastOptionsHandle> options_ref(&options_handle);
164        	CRef<CExportStrategy> export_strategy;
165         if(num_iters != 0)
166         	export_strategy.Reset(new CExportStrategy(iquery_ref, options_ref, search_db, kEmptyStr, num_iters));
167         else
168         	export_strategy.Reset(new CExportStrategy(iquery_ref, options_ref, search_db));
169 
170         CRef<objects::CBlast4_archive> archive = s_BuildArchiveAll(export_strategy, options_handle, results);
171         return archive;
172 
173 }
174 
175 CRef<objects::CBlast4_archive>
BlastBuildArchive(blast::IQueryFactory & queries,blast::CBlastOptionsHandle & options_handle,const CSearchResultSet & results,blast::IQueryFactory & subjects)176 BlastBuildArchive(blast::IQueryFactory& queries,
177                      blast::CBlastOptionsHandle& options_handle,
178                      const CSearchResultSet& results,
179                      blast::IQueryFactory& subjects)
180 {
181         CRef<blast::IQueryFactory> iquery_ref(&queries);
182         CRef<blast::IQueryFactory> isubject_ref(&subjects);
183         CRef<blast::CBlastOptionsHandle> options_ref(&options_handle);
184         CRef<CExportStrategy> export_strategy(new CExportStrategy(iquery_ref, options_ref, isubject_ref));
185 
186         return s_BuildArchiveAll(export_strategy, options_handle, results);
187 }
188 
189 CRef<objects::CBlast4_archive>
BlastBuildArchive(objects::CPssmWithParameters & pssm,blast::CBlastOptionsHandle & options_handle,const CSearchResultSet & results,CRef<CSearchDatabase> search_db,unsigned int num_iters)190 BlastBuildArchive(objects::CPssmWithParameters & pssm,
191                   blast::CBlastOptionsHandle& options_handle,
192                   const CSearchResultSet& results,
193                   CRef<CSearchDatabase>  search_db,
194                   unsigned int num_iters)
195 {
196         CRef<objects::CPssmWithParameters> pssm_ref(&pssm);
197         CRef<blast::CBlastOptionsHandle> options_ref(&options_handle);
198         CRef<CExportStrategy> export_strategy(new CExportStrategy(pssm_ref, options_ref, search_db, kEmptyStr, num_iters));
199 
200         CRef<objects::CBlast4_archive> archive = s_BuildArchiveAll(export_strategy, options_handle, results);
201         return archive;
202 
203 }
204 
205 END_SCOPE(blast)
206 END_NCBI_SCOPE
207 
208 /* @} */
209