1 /* $Id: blastn_app.cpp 632181 2021-05-27 13:23:25Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Christiam Camacho
27 *
28 */
29
30 /** @file blastn_app.cpp
31 * BLASTN command line application
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <algo/blast/api/local_blast.hpp>
37 #include <algo/blast/api/remote_blast.hpp>
38 #include <algo/blast/blastinput/blast_fasta_input.hpp>
39 #include <algo/blast/blastinput/blastn_args.hpp>
40 #include <algo/blast/api/objmgr_query_data.hpp>
41 #include <algo/blast/format/blast_format.hpp>
42 #include <util/profile/rtprofile.hpp>
43 #include "blast_app_util.hpp"
44 #include "blastn_node.hpp"
45
46 #ifndef SKIP_DOXYGEN_PROCESSING
47 USING_NCBI_SCOPE;
48 USING_SCOPE(blast);
49 USING_SCOPE(objects);
50 #endif
51
52 class CBlastnApp : public CNcbiApplication
53 {
54 public:
55 /** @inheritDoc */
CBlastnApp()56 CBlastnApp() {
57 CRef<CVersion> version(new CVersion());
58 version->SetVersionInfo(new CBlastVersion());
59 SetFullVersion(version);
60 m_StopWatch.Start();
61 if (m_UsageReport.IsEnabled()) {
62 m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
63 }
64 }
65
~CBlastnApp()66 ~CBlastnApp() {
67 m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
68 }
69 private:
70 /** @inheritDoc */
71 virtual void Init();
72 /** @inheritDoc */
73 virtual int Run();
74
75 int x_RunMTBySplitDB();
76 int x_RunMTBySplitQuery();
77
78 /// This application's command line args
79 CRef<CBlastnAppArgs> m_CmdLineArgs;
80 CBlastUsageReport m_UsageReport;
81 CStopWatch m_StopWatch;
82 };
83
Init()84 void CBlastnApp::Init()
85 {
86 // formulate command line arguments
87
88 m_CmdLineArgs.Reset(new CBlastnAppArgs());
89 // read the command line
90 HideStdArgs(fHideLogfile | fHideConffile | fHideFullVersion | fHideXmlHelp | fHideDryRun);
91 SetupArgDescriptions(m_CmdLineArgs->SetCommandLine());
92 }
93
Run(void)94 int CBlastnApp::Run(void)
95 {
96 const CArgs& args = GetArgs();
97 CMTArgs mt_args(args);
98 if ((mt_args.GetMTMode() == CMTArgs::eSplitByQueries) &&
99 (mt_args.GetNumThreads() > 1)){
100 m_UsageReport.AddParam(CBlastUsageReport::eMTMode, CMTArgs::eSplitByQueries);
101 return x_RunMTBySplitQuery();
102 }
103 else {
104 return x_RunMTBySplitDB();
105 }
106 }
107
x_RunMTBySplitDB()108 int CBlastnApp::x_RunMTBySplitDB()
109 {
110 BLAST_PROF_START( APP.MAIN );
111 BLAST_PROF_START( APP.PRE );
112 BLAST_PROF_ADD2( PROGRAM, blastn ) ;
113 int status = BLAST_EXIT_SUCCESS;
114 CBlastAppDiagHandler bah;
115 int batch_num = 0;
116
117 try {
118
119 // Allow the fasta reader to complain on invalid sequence input
120 SetDiagPostLevel(eDiag_Warning);
121 SetDiagPostPrefix("blastn");
122 SetDiagHandler(&bah, false);
123
124 /*** Get the BLAST options ***/
125 const CArgs& args = GetArgs();
126
127 CRef<CBlastOptionsHandle> opts_hndl;
128 if(RecoverSearchStrategy(args, m_CmdLineArgs)){
129 opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
130 }
131 else {
132 opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
133 }
134 const CBlastOptions& opt = opts_hndl->GetOptions();
135
136 /*** Initialize the database/subject ***/
137 CRef<CBlastDatabaseArgs> db_args(m_CmdLineArgs->GetBlastDatabaseArgs());
138 CRef<CLocalDbAdapter> db_adapter;
139 CRef<CScope> scope;
140 InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(),
141 db_adapter, scope);
142 _ASSERT(db_adapter && scope);
143
144 /*** Get the query sequence(s) ***/
145 CRef<CQueryOptionsArgs> query_opts =
146 m_CmdLineArgs->GetQueryOptionsArgs();
147
148 SDataLoaderConfig dlconfig =
149 InitializeQueryDataLoaderConfiguration(query_opts->QueryIsProtein(),
150 db_adapter);
151 CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
152 query_opts->UseLowercaseMasks(),
153 query_opts->GetParseDeflines(),
154 query_opts->GetRange());
155 if(IsIStreamEmpty(m_CmdLineArgs->GetInputStream())) {
156 ERR_POST(Warning << "Query is Empty!");
157 return BLAST_EXIT_SUCCESS;
158 }
159 CBlastFastaInputSource fasta(m_CmdLineArgs->GetInputStream(), iconfig);
160 CBlastInput input(&fasta);
161
162 // Initialize the megablast database index now so we can know whether an indexed search will be run.
163 // This is only important for the reference in the report, but would be done anyway.
164 if (opt.GetUseIndex() && !m_CmdLineArgs->ExecuteRemotely()) {
165 CRef<CBlastOptions> my_options(&(opts_hndl->SetOptions()));
166 CSetupFactory::InitializeMegablastDbIndex(my_options);
167 }
168 /*** Get the formatting options ***/
169 CRef<CFormattingArgs> fmt_args(m_CmdLineArgs->GetFormattingArgs());
170 bool isArchiveFormat = fmt_args->ArchiveFormatRequested(args);
171 if(!isArchiveFormat) {
172 bah.DoNotSaveMessages();
173 }
174 CBlastFormat formatter(opt, *db_adapter,
175 fmt_args->GetFormattedOutputChoice(),
176 query_opts->GetParseDeflines(),
177 m_CmdLineArgs->GetOutputStream(),
178 fmt_args->GetNumDescriptions(),
179 fmt_args->GetNumAlignments(),
180 *scope,
181 opt.GetMatrixName(),
182 fmt_args->ShowGis(),
183 fmt_args->DisplayHtmlOutput(),
184 opt.GetQueryGeneticCode(),
185 opt.GetDbGeneticCode(),
186 opt.GetSumStatisticsMode(),
187 m_CmdLineArgs->ExecuteRemotely(),
188 db_adapter->GetFilteringAlgorithm(),
189 fmt_args->GetCustomOutputFormatSpec(),
190 m_CmdLineArgs->GetTask() == "megablast",
191 opt.GetMBIndexLoaded(),
192 NULL, NULL,
193 GetCmdlineArgs(GetArguments()),
194 GetSubjectFile(args));
195
196 formatter.SetQueryRange(query_opts->GetRange());
197 formatter.SetLineLength(fmt_args->GetLineLength());
198 formatter.SetHitsSortOption(fmt_args->GetHitsSortOption());
199 formatter.SetHspsSortOption(fmt_args->GetHspsSortOption());
200 formatter.SetCustomDelimiter(fmt_args->GetCustomDelimiter());
201 if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) {
202 formatter.SetBaseFile(args[kArgOutput].AsString());
203 }
204 formatter.PrintProlog();
205
206 /*** Process the input ***/
207 CBatchSizeMixer mixer(SplitQuery_GetChunkSize(opt.GetProgram())-1000);
208 int batch_size = m_CmdLineArgs->GetQueryBatchSize();
209 if (batch_size) {
210 input.SetBatchSize(batch_size);
211 BLAST_PROF_ADD( BATCH_SIZE, (int)batch_size );
212 } else {
213 Int8 total_len = formatter.GetDbTotalLength();
214 if (total_len > 0) {
215 /* the optimal hits per batch scales with total db size */
216 mixer.SetTargetHits(total_len / 3000);
217 }
218 input.SetBatchSize(mixer.GetBatchSize());
219 BLAST_PROF_ADD( BATCH_SIZE, (int)mixer.GetBatchSize() );
220 }
221 BLAST_PROF_STOP( APP.PRE );
222 for (; !input.End(); formatter.ResetScopeHistory(), QueryBatchCleanup() ) {
223 BLAST_PROF_START( APP.LOOP.PRE );
224 CRef<CBlastQueryVector> query_batch(input.GetNextSeqBatch(*scope));
225 CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(*query_batch));
226
227 SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl);
228
229 CRef<CSearchResultSet> results;
230
231 BLAST_PROF_STOP( APP.LOOP.PRE );
232 if (m_CmdLineArgs->ExecuteRemotely()) {
233 CRef<CRemoteBlast> rmt_blast =
234 InitializeRemoteBlast(queries, db_args, opts_hndl,
235 m_CmdLineArgs->ProduceDebugRemoteOutput(),
236 m_CmdLineArgs->GetClientId());
237 results = rmt_blast->GetResultSet();
238 } else {
239 BLAST_PROF_START( APP.LOOP.BLAST );
240 CLocalBlast lcl_blast(queries, opts_hndl, db_adapter);
241 lcl_blast.SetNumberOfThreads(m_CmdLineArgs->GetNumThreads());
242 lcl_blast.SetBatchNumber( batch_num );
243 results = lcl_blast.Run();
244 if (!batch_size)
245 input.SetBatchSize(mixer.GetBatchSize(lcl_blast.GetNumExtensions()));
246 BLAST_PROF_STOP( APP.LOOP.BLAST );
247 }
248 BLAST_PROF_START( APP.LOOP.FMT );
249 if (isArchiveFormat) {
250 formatter.WriteArchive(*queries, *opts_hndl, *results, 0, bah.GetMessages());
251 bah.ResetMessages();
252 } else {
253 BlastFormatter_PreFetchSequenceData(*results, scope,
254 fmt_args->GetFormattedOutputChoice());
255 ITERATE(CSearchResultSet, result, *results) {
256 formatter.PrintOneResultSet(**result, query_batch);
257 }
258 }
259 BLAST_PROF_STOP( APP.LOOP.FMT );
260 batch_num++;
261 }
262 BLAST_PROF_START( APP.POST );
263 formatter.PrintEpilog(opt);
264
265 if (m_CmdLineArgs->ProduceDebugOutput()) {
266 opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
267 }
268
269 LogQueryInfo(m_UsageReport, input);
270 formatter.LogBlastSearchInfo(m_UsageReport);
271 BLAST_PROF_STOP( APP.POST );
272 } CATCH_ALL(status)
273
274 if(!bah.GetMessages().empty()) {
275 const CArgs & a = GetArgs();
276 PrintErrorArchive(a, bah.GetMessages());
277 }
278
279 m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
280 m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
281 BLAST_PROF_STOP( APP.MAIN );
282 BLAST_PROF_ADD( THREADS , (int)m_CmdLineArgs->GetNumThreads() );
283 BLAST_PROF_ADD( BATCHES , (int)batch_num );
284 BLAST_PROF_ADD( EXIT_STATUS , (int)status );
285 BLAST_PROF_REPORT ;
286 return status;
287 }
288
x_RunMTBySplitQuery()289 int CBlastnApp::x_RunMTBySplitQuery()
290 {
291 BLAST_PROF_START( APP.MAIN );
292 BLAST_PROF_START( APP.PRE );
293 BLAST_PROF_ADD2( PROGRAM, blastn ) ;
294 int status = BLAST_EXIT_SUCCESS;
295 CBlastAppDiagHandler bah;
296
297 // Allow the fasta reader to complain on invalid sequence input
298 SetDiagPostLevel(eDiag_Warning);
299 SetDiagPostPrefix("blastn");
300 SetDiagHandler(&bah, false);
301
302 try {
303 const CArgs& args = GetArgs();
304 CRef<CBlastOptionsHandle> opts_hndl;
305 if(RecoverSearchStrategy(args, m_CmdLineArgs)) {
306 opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
307 }
308 else {
309 opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
310 }
311 if(IsIStreamEmpty(m_CmdLineArgs->GetInputStream())){
312 ERR_POST(Warning << "Query is Empty!");
313 return BLAST_EXIT_SUCCESS;
314 }
315 CNcbiOstream & out_stream = m_CmdLineArgs->GetOutputStream();
316 const int kMaxNumOfThreads = m_CmdLineArgs->GetNumThreads();
317 CBlastMasterNode master_node(out_stream, kMaxNumOfThreads);
318 int chunk_num = 0;
319 int batch_size = GetMTByQueriesBatchSize(opts_hndl->GetOptions().GetProgram(), kMaxNumOfThreads);
320 INFO_POST("Batch Size: " << batch_size);
321 CBlastNodeInputReader input(m_CmdLineArgs->GetInputStream(), batch_size, 2000);
322 while (master_node.Processing()) {
323 if (!input.AtEOF()) {
324 if (!master_node.IsFull()) {
325 string qb;
326 int q_index = 0;
327 int num_q = input.GetQueryBatch(qb, q_index);
328 if (num_q > 0) {
329 CBlastNodeMailbox * mb(new CBlastNodeMailbox(chunk_num, master_node.GetBuzzer()));
330 CBlastnNode * t(new CBlastnNode(chunk_num, GetArguments(), args, bah, qb, q_index, num_q, mb));
331 master_node.RegisterNode(t, mb);
332 chunk_num ++;
333 }
334 }
335 }
336 else {
337 master_node.Shutdown();
338 }
339 }
340
341 if(chunk_num < kMaxNumOfThreads){
342 CheckMTByQueries_QuerySize(opts_hndl->GetOptions().GetProgram(), batch_size);
343 }
344 } CATCH_ALL (status)
345
346 if(!bah.GetMessages().empty()) {
347 const CArgs & a = GetArgs();
348 PrintErrorArchive(a, bah.GetMessages());
349 }
350 BLAST_PROF_STOP( APP.MAIN );
351 BLAST_PROF_ADD( THREADS , (int)m_CmdLineArgs->GetNumThreads() );
352 BLAST_PROF_ADD( EXIT_STATUS , (int)status );
353 BLAST_PROF_REPORT ;
354
355 return status;
356 }
357
358 #ifndef SKIP_DOXYGEN_PROCESSING
NcbiSys_main(int argc,ncbi::TXChar * argv[])359 int NcbiSys_main(int argc, ncbi::TXChar* argv[])
360 {
361 return CBlastnApp().AppMain(argc, argv);
362 }
363 #endif /* SKIP_DOXYGEN_PROCESSING */
364