1 /*  $Id: igblastp_app.cpp 556289 2018-01-29 17:33:51Z jianye $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Ning Ma
27  *
28  */
29 
30 /** @file igblastp_app.cpp
31  * IGBLASTP command line application
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <algo/blast/api/local_blast.hpp>
37 #include <algo/blast/api/remote_blast.hpp>
38 #include <algo/blast/blastinput/blast_fasta_input.hpp>
39 #include <algo/blast/blastinput/igblastp_args.hpp>
40 #include <algo/blast/api/objmgr_query_data.hpp>
41 #include <algo/blast/format/blast_format.hpp>
42 #include "../blast/blast_app_util.hpp"
43 
44 #ifndef SKIP_DOXYGEN_PROCESSING
45 USING_NCBI_SCOPE;
46 USING_SCOPE(blast);
47 USING_SCOPE(objects);
48 #endif
49 
50 class CIgBlastpApp : public CNcbiApplication
51 {
52 public:
53     /** @inheritDoc */
CIgBlastpApp()54     CIgBlastpApp() {
55         CRef<CVersion> version(new CVersion());
56         version->SetVersionInfo(new CIgBlastVersion());
57         SetFullVersion(version);
58     }
59 private:
60     /** @inheritDoc */
61     virtual void Init();
62     /** @inheritDoc */
63     virtual int Run();
64 
65     /// This application's command line args
66     CRef<CIgBlastpAppArgs> m_CmdLineArgs;
67 };
68 
Init()69 void CIgBlastpApp::Init()
70 {
71     // formulate command line arguments
72 
73     m_CmdLineArgs.Reset(new CIgBlastpAppArgs());
74 
75     // read the command line
76 
77     HideStdArgs(fHideLogfile | fHideConffile | fHideFullVersion | fHideXmlHelp | fHideDryRun);
78     SetupArgDescriptions(m_CmdLineArgs->SetCommandLine());
79 }
80 
Run(void)81 int CIgBlastpApp::Run(void)
82 {
83     int status = BLAST_EXIT_SUCCESS;
84 
85     try {
86 
87         // Allow the fasta reader to complain on invalid sequence input
88         SetDiagPostLevel(eDiag_Warning);
89 
90         /*** Get the BLAST options ***/
91         const CArgs& args = GetArgs();
92         CRef<CBlastOptionsHandle> opts_hndl;
93 
94         opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
95 
96         const CBlastOptions& opt = opts_hndl->GetOptions();
97 
98         /*** Get the query sequence(s) ***/
99         CRef<CQueryOptionsArgs> query_opts =
100             m_CmdLineArgs->GetQueryOptionsArgs();
101         SDataLoaderConfig dlconfig(query_opts->QueryIsProtein());
102         dlconfig.OptimizeForWholeLargeSequenceRetrieval();
103         CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
104                                      query_opts->UseLowercaseMasks(),
105                                      query_opts->GetParseDeflines(),
106                                      query_opts->GetRange());
107         iconfig.SetQueryLocalIdMode();
108         CBlastFastaInputSource fasta(m_CmdLineArgs->GetInputStream(), iconfig);
109         CBlastInput input(&fasta, m_CmdLineArgs->GetQueryBatchSize());
110 
111         /*** Initialize igblast database/subject and options ***/
112         CRef<CIgBlastArgs> ig_args(m_CmdLineArgs->GetIgBlastArgs());
113         CRef<CIgBlastOptions> ig_opts(ig_args->GetIgBlastOptions());
114 
115         /*** Initialize the database/subject ***/
116         bool db_is_remote = true;
117         CRef<CScope> scope;
118         CRef<CLocalDbAdapter> blastdb;
119         CRef<CLocalDbAdapter> blastdb_full;
120         CRef<CBlastDatabaseArgs> db_args(m_CmdLineArgs->GetBlastDatabaseArgs());
121         if (db_args->GetDatabaseName() == kEmptyStr &&
122             db_args->GetSubjects().Empty()) {
123             blastdb.Reset(&(*(ig_opts->m_Db[0])));
124             scope.Reset(new CScope(*CObjectManager::GetInstance()));
125             db_is_remote = false;
126             blastdb_full.Reset(&(*blastdb));
127         } else {
128             InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(),
129                               blastdb, scope);
130             if (m_CmdLineArgs->ExecuteRemotely()) {
131                 blastdb_full.Reset(&(*blastdb));
132             } else {
133                 CSearchDatabase sdb(ig_opts->m_Db[0]->GetDatabaseName() + " " +
134                        blastdb->GetDatabaseName(),
135                        CSearchDatabase::eBlastDbIsProtein);
136                 blastdb_full.Reset(new CLocalDbAdapter(sdb));
137             }
138         }
139         _ASSERT(blastdb && scope);
140 
141         // TODO: whose priority is higher?
142         ig_args->AddIgSequenceScope(scope);
143 
144         /*** Get the formatting options ***/
145         CRef<CFormattingArgs> fmt_args(m_CmdLineArgs->GetFormattingArgs());
146         Int4 num_alignments = (db_args->GetDatabaseName() == kEmptyStr) ?
147                                0 : fmt_args->GetNumAlignments();
148         CBlastFormat formatter(opt, *blastdb_full,
149                                fmt_args->GetFormattedOutputChoice(),
150                                query_opts->GetParseDeflines(),
151                                m_CmdLineArgs->GetOutputStream(),
152                                fmt_args->GetNumDescriptions(),
153                                num_alignments,
154                                *scope,
155                                opt.GetMatrixName(),
156                                fmt_args->ShowGis(),
157                                fmt_args->DisplayHtmlOutput(),
158                                opt.GetQueryGeneticCode(),
159                                opt.GetDbGeneticCode(),
160                                opt.GetSumStatisticsMode(),
161                                false,
162                                blastdb->GetFilteringAlgorithm(),
163                                fmt_args->GetCustomOutputFormatSpec(),
164                                false,
165                                false,
166                                &*ig_opts);
167 
168 
169         //formatter.PrintProlog();
170         if(fmt_args->GetFormattedOutputChoice() ==
171            CFormattingArgs::eFlatQueryAnchoredIdentities ||
172            fmt_args->GetFormattedOutputChoice() ==
173            CFormattingArgs::eFlatQueryAnchoredNoIdentities){
174             if(blastdb_full->GetDatabaseName() != NcbiEmptyString){
175                 vector<CBlastFormatUtil::SDbInfo> db_info;
176                 CBlastFormatUtil::GetBlastDbInfo(db_info, blastdb_full->GetDatabaseName(),
177                                                  ig_opts->m_IsProtein, -1, false);
178                 CBlastFormatUtil::PrintDbReport(db_info, 68, m_CmdLineArgs->GetOutputStream(), true);
179             }
180         }
181         /*** Process the input ***/
182         for (; !input.End(); formatter.ResetScopeHistory()) {
183 
184             CRef<CBlastQueryVector> query(input.GetNextSeqBatch(*scope));
185 
186             //SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl);
187             CRef<CSearchResultSet> results;
188 
189             if (m_CmdLineArgs->ExecuteRemotely() && db_is_remote) {
190                 CIgBlast rmt_blast(query,
191                                    db_args->GetSearchDatabase(),
192                                    db_args->GetSubjects(),
193                                    opts_hndl, ig_opts,
194                                    NcbiEmptyString, scope);
195                 //TODO:          m_CmdLineArgs->ProduceDebugRemoteOutput(),
196                 //TODO:          m_CmdLineArgs->GetClientId());
197                 results = rmt_blast.Run();
198             } else {
199                 CIgBlast lcl_blast(query, blastdb, opts_hndl, ig_opts, scope);
200                 lcl_blast.SetNumberOfThreads(m_CmdLineArgs->GetNumThreads());
201                 results = lcl_blast.Run();
202             }
203 
204             /* TODO should we support archive format?
205             if (fmt_args->ArchiveFormatRequested(args)) {
206                 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(*query));
207                 formatter.WriteArchive(*qf, *opts_hndl, *results);
208             } else {
209             */
210             BlastFormatter_PreFetchSequenceData(*results, scope,
211             									fmt_args->GetFormattedOutputChoice());
212             ITERATE(CSearchResultSet, result, *results) {
213                 CBlastFormat::SClone clone_info;
214                 CIgBlastResults &ig_result = *const_cast<CIgBlastResults *>
215                         (dynamic_cast<const CIgBlastResults *>(&(**result)));
216                 formatter.PrintOneResultSet(ig_result, query, clone_info, false, false);
217             }
218         }
219 
220         formatter.PrintEpilog(opt);
221 
222         if (m_CmdLineArgs->ProduceDebugOutput()) {
223             opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
224         }
225 
226     } CATCH_ALL(status)
227     return status;
228 }
229 
230 #ifndef SKIP_DOXYGEN_PROCESSING
main(int argc,const char * argv[])231 int main(int argc, const char* argv[] /*, const char* envp[]*/)
232 {
233     return CIgBlastpApp().AppMain(argc, argv, 0, eDS_Default, "");
234 }
235 #endif /* SKIP_DOXYGEN_PROCESSING */
236