1 /* $Id: igblastp_app.cpp 556289 2018-01-29 17:33:51Z jianye $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Ning Ma
27 *
28 */
29
30 /** @file igblastp_app.cpp
31 * IGBLASTP command line application
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <algo/blast/api/local_blast.hpp>
37 #include <algo/blast/api/remote_blast.hpp>
38 #include <algo/blast/blastinput/blast_fasta_input.hpp>
39 #include <algo/blast/blastinput/igblastp_args.hpp>
40 #include <algo/blast/api/objmgr_query_data.hpp>
41 #include <algo/blast/format/blast_format.hpp>
42 #include "../blast/blast_app_util.hpp"
43
44 #ifndef SKIP_DOXYGEN_PROCESSING
45 USING_NCBI_SCOPE;
46 USING_SCOPE(blast);
47 USING_SCOPE(objects);
48 #endif
49
50 class CIgBlastpApp : public CNcbiApplication
51 {
52 public:
53 /** @inheritDoc */
CIgBlastpApp()54 CIgBlastpApp() {
55 CRef<CVersion> version(new CVersion());
56 version->SetVersionInfo(new CIgBlastVersion());
57 SetFullVersion(version);
58 }
59 private:
60 /** @inheritDoc */
61 virtual void Init();
62 /** @inheritDoc */
63 virtual int Run();
64
65 /// This application's command line args
66 CRef<CIgBlastpAppArgs> m_CmdLineArgs;
67 };
68
Init()69 void CIgBlastpApp::Init()
70 {
71 // formulate command line arguments
72
73 m_CmdLineArgs.Reset(new CIgBlastpAppArgs());
74
75 // read the command line
76
77 HideStdArgs(fHideLogfile | fHideConffile | fHideFullVersion | fHideXmlHelp | fHideDryRun);
78 SetupArgDescriptions(m_CmdLineArgs->SetCommandLine());
79 }
80
Run(void)81 int CIgBlastpApp::Run(void)
82 {
83 int status = BLAST_EXIT_SUCCESS;
84
85 try {
86
87 // Allow the fasta reader to complain on invalid sequence input
88 SetDiagPostLevel(eDiag_Warning);
89
90 /*** Get the BLAST options ***/
91 const CArgs& args = GetArgs();
92 CRef<CBlastOptionsHandle> opts_hndl;
93
94 opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
95
96 const CBlastOptions& opt = opts_hndl->GetOptions();
97
98 /*** Get the query sequence(s) ***/
99 CRef<CQueryOptionsArgs> query_opts =
100 m_CmdLineArgs->GetQueryOptionsArgs();
101 SDataLoaderConfig dlconfig(query_opts->QueryIsProtein());
102 dlconfig.OptimizeForWholeLargeSequenceRetrieval();
103 CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
104 query_opts->UseLowercaseMasks(),
105 query_opts->GetParseDeflines(),
106 query_opts->GetRange());
107 iconfig.SetQueryLocalIdMode();
108 CBlastFastaInputSource fasta(m_CmdLineArgs->GetInputStream(), iconfig);
109 CBlastInput input(&fasta, m_CmdLineArgs->GetQueryBatchSize());
110
111 /*** Initialize igblast database/subject and options ***/
112 CRef<CIgBlastArgs> ig_args(m_CmdLineArgs->GetIgBlastArgs());
113 CRef<CIgBlastOptions> ig_opts(ig_args->GetIgBlastOptions());
114
115 /*** Initialize the database/subject ***/
116 bool db_is_remote = true;
117 CRef<CScope> scope;
118 CRef<CLocalDbAdapter> blastdb;
119 CRef<CLocalDbAdapter> blastdb_full;
120 CRef<CBlastDatabaseArgs> db_args(m_CmdLineArgs->GetBlastDatabaseArgs());
121 if (db_args->GetDatabaseName() == kEmptyStr &&
122 db_args->GetSubjects().Empty()) {
123 blastdb.Reset(&(*(ig_opts->m_Db[0])));
124 scope.Reset(new CScope(*CObjectManager::GetInstance()));
125 db_is_remote = false;
126 blastdb_full.Reset(&(*blastdb));
127 } else {
128 InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(),
129 blastdb, scope);
130 if (m_CmdLineArgs->ExecuteRemotely()) {
131 blastdb_full.Reset(&(*blastdb));
132 } else {
133 CSearchDatabase sdb(ig_opts->m_Db[0]->GetDatabaseName() + " " +
134 blastdb->GetDatabaseName(),
135 CSearchDatabase::eBlastDbIsProtein);
136 blastdb_full.Reset(new CLocalDbAdapter(sdb));
137 }
138 }
139 _ASSERT(blastdb && scope);
140
141 // TODO: whose priority is higher?
142 ig_args->AddIgSequenceScope(scope);
143
144 /*** Get the formatting options ***/
145 CRef<CFormattingArgs> fmt_args(m_CmdLineArgs->GetFormattingArgs());
146 Int4 num_alignments = (db_args->GetDatabaseName() == kEmptyStr) ?
147 0 : fmt_args->GetNumAlignments();
148 CBlastFormat formatter(opt, *blastdb_full,
149 fmt_args->GetFormattedOutputChoice(),
150 query_opts->GetParseDeflines(),
151 m_CmdLineArgs->GetOutputStream(),
152 fmt_args->GetNumDescriptions(),
153 num_alignments,
154 *scope,
155 opt.GetMatrixName(),
156 fmt_args->ShowGis(),
157 fmt_args->DisplayHtmlOutput(),
158 opt.GetQueryGeneticCode(),
159 opt.GetDbGeneticCode(),
160 opt.GetSumStatisticsMode(),
161 false,
162 blastdb->GetFilteringAlgorithm(),
163 fmt_args->GetCustomOutputFormatSpec(),
164 false,
165 false,
166 &*ig_opts);
167
168
169 //formatter.PrintProlog();
170 if(fmt_args->GetFormattedOutputChoice() ==
171 CFormattingArgs::eFlatQueryAnchoredIdentities ||
172 fmt_args->GetFormattedOutputChoice() ==
173 CFormattingArgs::eFlatQueryAnchoredNoIdentities){
174 if(blastdb_full->GetDatabaseName() != NcbiEmptyString){
175 vector<CBlastFormatUtil::SDbInfo> db_info;
176 CBlastFormatUtil::GetBlastDbInfo(db_info, blastdb_full->GetDatabaseName(),
177 ig_opts->m_IsProtein, -1, false);
178 CBlastFormatUtil::PrintDbReport(db_info, 68, m_CmdLineArgs->GetOutputStream(), true);
179 }
180 }
181 /*** Process the input ***/
182 for (; !input.End(); formatter.ResetScopeHistory()) {
183
184 CRef<CBlastQueryVector> query(input.GetNextSeqBatch(*scope));
185
186 //SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl);
187 CRef<CSearchResultSet> results;
188
189 if (m_CmdLineArgs->ExecuteRemotely() && db_is_remote) {
190 CIgBlast rmt_blast(query,
191 db_args->GetSearchDatabase(),
192 db_args->GetSubjects(),
193 opts_hndl, ig_opts,
194 NcbiEmptyString, scope);
195 //TODO: m_CmdLineArgs->ProduceDebugRemoteOutput(),
196 //TODO: m_CmdLineArgs->GetClientId());
197 results = rmt_blast.Run();
198 } else {
199 CIgBlast lcl_blast(query, blastdb, opts_hndl, ig_opts, scope);
200 lcl_blast.SetNumberOfThreads(m_CmdLineArgs->GetNumThreads());
201 results = lcl_blast.Run();
202 }
203
204 /* TODO should we support archive format?
205 if (fmt_args->ArchiveFormatRequested(args)) {
206 CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(*query));
207 formatter.WriteArchive(*qf, *opts_hndl, *results);
208 } else {
209 */
210 BlastFormatter_PreFetchSequenceData(*results, scope,
211 fmt_args->GetFormattedOutputChoice());
212 ITERATE(CSearchResultSet, result, *results) {
213 CBlastFormat::SClone clone_info;
214 CIgBlastResults &ig_result = *const_cast<CIgBlastResults *>
215 (dynamic_cast<const CIgBlastResults *>(&(**result)));
216 formatter.PrintOneResultSet(ig_result, query, clone_info, false, false);
217 }
218 }
219
220 formatter.PrintEpilog(opt);
221
222 if (m_CmdLineArgs->ProduceDebugOutput()) {
223 opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
224 }
225
226 } CATCH_ALL(status)
227 return status;
228 }
229
230 #ifndef SKIP_DOXYGEN_PROCESSING
main(int argc,const char * argv[])231 int main(int argc, const char* argv[] /*, const char* envp[]*/)
232 {
233 return CIgBlastpApp().AppMain(argc, argv, 0, eDS_Default, "");
234 }
235 #endif /* SKIP_DOXYGEN_PROCESSING */
236