1 /*  $Id: blastn_app.cpp 632181 2021-05-27 13:23:25Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Christiam Camacho
27  *
28  */
29 
30 /** @file blastn_app.cpp
31  * BLASTN command line application
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <algo/blast/api/local_blast.hpp>
37 #include <algo/blast/api/remote_blast.hpp>
38 #include <algo/blast/blastinput/blast_fasta_input.hpp>
39 #include <algo/blast/blastinput/blastn_args.hpp>
40 #include <algo/blast/api/objmgr_query_data.hpp>
41 #include <algo/blast/format/blast_format.hpp>
42 #include <util/profile/rtprofile.hpp>
43 #include "blast_app_util.hpp"
44 #include "blastn_node.hpp"
45 
46 #ifndef SKIP_DOXYGEN_PROCESSING
47 USING_NCBI_SCOPE;
48 USING_SCOPE(blast);
49 USING_SCOPE(objects);
50 #endif
51 
52 class CBlastnApp : public CNcbiApplication
53 {
54 public:
55     /** @inheritDoc */
CBlastnApp()56     CBlastnApp() {
57         CRef<CVersion> version(new CVersion());
58         version->SetVersionInfo(new CBlastVersion());
59         SetFullVersion(version);
60         m_StopWatch.Start();
61         if (m_UsageReport.IsEnabled()) {
62         	m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
63         }
64     }
65 
~CBlastnApp()66     ~CBlastnApp() {
67     	m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
68     }
69 private:
70     /** @inheritDoc */
71     virtual void Init();
72     /** @inheritDoc */
73     virtual int Run();
74 
75     int x_RunMTBySplitDB();
76     int x_RunMTBySplitQuery();
77 
78     /// This application's command line args
79     CRef<CBlastnAppArgs> m_CmdLineArgs;
80     CBlastUsageReport m_UsageReport;
81     CStopWatch m_StopWatch;
82 };
83 
Init()84 void CBlastnApp::Init()
85 {
86     // formulate command line arguments
87 
88     m_CmdLineArgs.Reset(new CBlastnAppArgs());
89     // read the command line
90     HideStdArgs(fHideLogfile | fHideConffile | fHideFullVersion | fHideXmlHelp | fHideDryRun);
91     SetupArgDescriptions(m_CmdLineArgs->SetCommandLine());
92 }
93 
Run(void)94 int CBlastnApp::Run(void)
95 {
96 	const CArgs& args = GetArgs();
97 	CMTArgs mt_args(args);
98 	if ((mt_args.GetMTMode() == CMTArgs::eSplitByQueries) &&
99 		(mt_args.GetNumThreads() > 1)){
100 		m_UsageReport.AddParam(CBlastUsageReport::eMTMode, CMTArgs::eSplitByQueries);
101 		return x_RunMTBySplitQuery();
102 	}
103 	else {
104 		return x_RunMTBySplitDB();
105 	}
106 }
107 
x_RunMTBySplitDB()108 int CBlastnApp::x_RunMTBySplitDB()
109 {
110     BLAST_PROF_START( APP.MAIN );
111     BLAST_PROF_START( APP.PRE );
112     BLAST_PROF_ADD2( PROGRAM, blastn ) ;
113     int status = BLAST_EXIT_SUCCESS;
114     CBlastAppDiagHandler bah;
115     int batch_num = 0;
116 
117     try {
118 
119         // Allow the fasta reader to complain on invalid sequence input
120         SetDiagPostLevel(eDiag_Warning);
121         SetDiagPostPrefix("blastn");
122         SetDiagHandler(&bah, false);
123 
124         /*** Get the BLAST options ***/
125         const CArgs& args = GetArgs();
126 
127         CRef<CBlastOptionsHandle> opts_hndl;
128         if(RecoverSearchStrategy(args, m_CmdLineArgs)){
129         	opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
130         }
131         else {
132         	opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
133         }
134         const CBlastOptions& opt = opts_hndl->GetOptions();
135 
136         /*** Initialize the database/subject ***/
137         CRef<CBlastDatabaseArgs> db_args(m_CmdLineArgs->GetBlastDatabaseArgs());
138         CRef<CLocalDbAdapter> db_adapter;
139         CRef<CScope> scope;
140         InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(),
141                          db_adapter, scope);
142         _ASSERT(db_adapter && scope);
143 
144         /*** Get the query sequence(s) ***/
145         CRef<CQueryOptionsArgs> query_opts =
146             m_CmdLineArgs->GetQueryOptionsArgs();
147 
148         SDataLoaderConfig dlconfig =
149             InitializeQueryDataLoaderConfiguration(query_opts->QueryIsProtein(),
150                                                    db_adapter);
151         CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
152                                      query_opts->UseLowercaseMasks(),
153                                      query_opts->GetParseDeflines(),
154                                      query_opts->GetRange());
155         if(IsIStreamEmpty(m_CmdLineArgs->GetInputStream())) {
156            	ERR_POST(Warning << "Query is Empty!");
157            	return BLAST_EXIT_SUCCESS;
158         }
159         CBlastFastaInputSource fasta(m_CmdLineArgs->GetInputStream(), iconfig);
160         CBlastInput input(&fasta);
161 
162         // Initialize the megablast database index now so we can know whether an indexed search will be run.
163         // This is only important for the reference in the report, but would be done anyway.
164         if (opt.GetUseIndex() && !m_CmdLineArgs->ExecuteRemotely()) {
165             CRef<CBlastOptions> my_options(&(opts_hndl->SetOptions()));
166             CSetupFactory::InitializeMegablastDbIndex(my_options);
167         }
168         /*** Get the formatting options ***/
169         CRef<CFormattingArgs> fmt_args(m_CmdLineArgs->GetFormattingArgs());
170         bool isArchiveFormat = fmt_args->ArchiveFormatRequested(args);
171         if(!isArchiveFormat) {
172         	bah.DoNotSaveMessages();
173         }
174         CBlastFormat formatter(opt, *db_adapter,
175                                fmt_args->GetFormattedOutputChoice(),
176                                query_opts->GetParseDeflines(),
177                                m_CmdLineArgs->GetOutputStream(),
178                                fmt_args->GetNumDescriptions(),
179                                fmt_args->GetNumAlignments(),
180                                *scope,
181                                opt.GetMatrixName(),
182                                fmt_args->ShowGis(),
183                                fmt_args->DisplayHtmlOutput(),
184                                opt.GetQueryGeneticCode(),
185                                opt.GetDbGeneticCode(),
186                                opt.GetSumStatisticsMode(),
187                                m_CmdLineArgs->ExecuteRemotely(),
188                                db_adapter->GetFilteringAlgorithm(),
189                                fmt_args->GetCustomOutputFormatSpec(),
190                                m_CmdLineArgs->GetTask() == "megablast",
191                                opt.GetMBIndexLoaded(),
192                                NULL, NULL,
193                                GetCmdlineArgs(GetArguments()),
194 			       GetSubjectFile(args));
195 
196         formatter.SetQueryRange(query_opts->GetRange());
197         formatter.SetLineLength(fmt_args->GetLineLength());
198         formatter.SetHitsSortOption(fmt_args->GetHitsSortOption());
199         formatter.SetHspsSortOption(fmt_args->GetHspsSortOption());
200         formatter.SetCustomDelimiter(fmt_args->GetCustomDelimiter());
201         if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) {
202         	formatter.SetBaseFile(args[kArgOutput].AsString());
203         }
204         formatter.PrintProlog();
205 
206         /*** Process the input ***/
207         CBatchSizeMixer mixer(SplitQuery_GetChunkSize(opt.GetProgram())-1000);
208         int batch_size = m_CmdLineArgs->GetQueryBatchSize();
209         if (batch_size) {
210             input.SetBatchSize(batch_size);
211 	    BLAST_PROF_ADD( BATCH_SIZE, (int)batch_size );
212         } else {
213             Int8 total_len = formatter.GetDbTotalLength();
214             if (total_len > 0) {
215                 /* the optimal hits per batch scales with total db size */
216                 mixer.SetTargetHits(total_len / 3000);
217             }
218             input.SetBatchSize(mixer.GetBatchSize());
219 	    BLAST_PROF_ADD( BATCH_SIZE, (int)mixer.GetBatchSize() );
220         }
221 	BLAST_PROF_STOP( APP.PRE );
222         for (; !input.End(); formatter.ResetScopeHistory(), QueryBatchCleanup() ) {
223 	    BLAST_PROF_START( APP.LOOP.PRE );
224             CRef<CBlastQueryVector> query_batch(input.GetNextSeqBatch(*scope));
225             CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(*query_batch));
226 
227             SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl);
228 
229             CRef<CSearchResultSet> results;
230 
231 	    BLAST_PROF_STOP( APP.LOOP.PRE );
232             if (m_CmdLineArgs->ExecuteRemotely()) {
233                 CRef<CRemoteBlast> rmt_blast =
234                     InitializeRemoteBlast(queries, db_args, opts_hndl,
235                           m_CmdLineArgs->ProduceDebugRemoteOutput(),
236                           m_CmdLineArgs->GetClientId());
237                 results = rmt_blast->GetResultSet();
238             } else {
239 	        BLAST_PROF_START( APP.LOOP.BLAST );
240                 CLocalBlast lcl_blast(queries, opts_hndl, db_adapter);
241                 lcl_blast.SetNumberOfThreads(m_CmdLineArgs->GetNumThreads());
242 		        lcl_blast.SetBatchNumber( batch_num );
243                 results = lcl_blast.Run();
244                 if (!batch_size)
245                     input.SetBatchSize(mixer.GetBatchSize(lcl_blast.GetNumExtensions()));
246 	        BLAST_PROF_STOP( APP.LOOP.BLAST );
247             }
248 	    BLAST_PROF_START( APP.LOOP.FMT );
249             if (isArchiveFormat) {
250                 formatter.WriteArchive(*queries, *opts_hndl, *results, 0, bah.GetMessages());
251                 bah.ResetMessages();
252             } else {
253                 BlastFormatter_PreFetchSequenceData(*results, scope,
254                 			                        fmt_args->GetFormattedOutputChoice());
255                 ITERATE(CSearchResultSet, result, *results) {
256                     formatter.PrintOneResultSet(**result, query_batch);
257                 }
258             }
259 	    BLAST_PROF_STOP( APP.LOOP.FMT );
260 	    batch_num++;
261         }
262         BLAST_PROF_START( APP.POST );
263         formatter.PrintEpilog(opt);
264 
265         if (m_CmdLineArgs->ProduceDebugOutput()) {
266             opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
267         }
268 
269         LogQueryInfo(m_UsageReport, input);
270         formatter.LogBlastSearchInfo(m_UsageReport);
271         BLAST_PROF_STOP( APP.POST );
272     } CATCH_ALL(status)
273 
274     if(!bah.GetMessages().empty()) {
275     	const CArgs & a = GetArgs();
276     	PrintErrorArchive(a, bah.GetMessages());
277     }
278 
279 	m_UsageReport.AddParam(CBlastUsageReport::eNumThreads, (int) m_CmdLineArgs->GetNumThreads());
280     m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
281     BLAST_PROF_STOP( APP.MAIN );
282     BLAST_PROF_ADD( THREADS , (int)m_CmdLineArgs->GetNumThreads() );
283     BLAST_PROF_ADD( BATCHES , (int)batch_num );
284     BLAST_PROF_ADD( EXIT_STATUS , (int)status );
285     BLAST_PROF_REPORT ;
286     return status;
287 }
288 
x_RunMTBySplitQuery()289 int CBlastnApp::x_RunMTBySplitQuery()
290 {
291     BLAST_PROF_START( APP.MAIN );
292     BLAST_PROF_START( APP.PRE );
293     BLAST_PROF_ADD2( PROGRAM, blastn ) ;
294     int status = BLAST_EXIT_SUCCESS;
295     CBlastAppDiagHandler bah;
296 
297     // Allow the fasta reader to complain on invalid sequence input
298     SetDiagPostLevel(eDiag_Warning);
299     SetDiagPostPrefix("blastn");
300     SetDiagHandler(&bah, false);
301 
302 	try {
303     	const CArgs& args = GetArgs();
304     	CRef<CBlastOptionsHandle> opts_hndl;
305         if(RecoverSearchStrategy(args, m_CmdLineArgs)) {
306         	opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
307         }
308         else {
309         	opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
310         }
311     	if(IsIStreamEmpty(m_CmdLineArgs->GetInputStream())){
312        		ERR_POST(Warning << "Query is Empty!");
313        		return BLAST_EXIT_SUCCESS;
314     	}
315     	CNcbiOstream & out_stream = m_CmdLineArgs->GetOutputStream();
316     	const int kMaxNumOfThreads = m_CmdLineArgs->GetNumThreads();
317 		CBlastMasterNode master_node(out_stream, kMaxNumOfThreads);
318    		int chunk_num = 0;
319    	    int batch_size = GetMTByQueriesBatchSize(opts_hndl->GetOptions().GetProgram(), kMaxNumOfThreads);
320    		INFO_POST("Batch Size: " << batch_size);
321    		CBlastNodeInputReader input(m_CmdLineArgs->GetInputStream(), batch_size, 2000);
322 		while (master_node.Processing()) {
323 			if (!input.AtEOF()) {
324 			 	if (!master_node.IsFull()) {
325 					string qb;
326 					int q_index = 0;
327 					int num_q = input.GetQueryBatch(qb, q_index);
328 					if (num_q > 0) {
329 						CBlastNodeMailbox * mb(new CBlastNodeMailbox(chunk_num, master_node.GetBuzzer()));
330 						CBlastnNode * t(new CBlastnNode(chunk_num, GetArguments(), args, bah, qb, q_index, num_q, mb));
331 						master_node.RegisterNode(t, mb);
332 						chunk_num ++;
333 					}
334 				}
335 			}
336 			else {
337 				master_node.Shutdown();
338 			}
339     	}
340 
341 		if(chunk_num < kMaxNumOfThreads){
342 			CheckMTByQueries_QuerySize(opts_hndl->GetOptions().GetProgram(), batch_size);
343 		}
344 	} CATCH_ALL (status)
345 
346     if(!bah.GetMessages().empty()) {
347     	const CArgs & a = GetArgs();
348     	PrintErrorArchive(a, bah.GetMessages());
349     }
350 	    BLAST_PROF_STOP( APP.MAIN );
351     BLAST_PROF_ADD( THREADS , (int)m_CmdLineArgs->GetNumThreads() );
352     BLAST_PROF_ADD( EXIT_STATUS , (int)status );
353     BLAST_PROF_REPORT ;
354 
355     return status;
356 }
357 
358 #ifndef SKIP_DOXYGEN_PROCESSING
NcbiSys_main(int argc,ncbi::TXChar * argv[])359 int NcbiSys_main(int argc, ncbi::TXChar* argv[])
360 {
361     return CBlastnApp().AppMain(argc, argv);
362 }
363 #endif /* SKIP_DOXYGEN_PROCESSING */
364