1 /* $Id: blastdbcmd.cpp 631510 2021-05-19 13:47:40Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Christiam Camacho
27 *
28 */
29
30 /** @file blastdbcmd.cpp
31 * Command line tool to examine the contents of BLAST databases. This is the
32 * successor to fastacmd from the C toolkit
33 */
34
35 #include <ncbi_pch.hpp>
36 #include <corelib/ncbiapp.hpp>
37 #include <algo/blast/api/version.hpp>
38 #include <objtools/blast/seqdb_reader/seqdbexpert.hpp>
39 #include <objtools/blast/seqdb_reader/impl/seqdbtax.hpp>
40 #include <algo/blast/api/blast_exception.hpp>
41 #include <algo/blast/blastinput/blast_input_aux.hpp>
42 #include <objtools/blast/blastdb_format/seq_formatter.hpp>
43 #include <objtools/blast/blastdb_format/blastdb_formatter.hpp>
44 #include <objtools/blast/blastdb_format/blastdb_seqid.hpp>
45 #include <algo/blast/blastinput/blast_input.hpp>
46 #include <objects/seqloc/PDB_seq_id.hpp>
47 #include "../blast/blast_app_util.hpp"
48 #include <iomanip>
49
50
51 #ifndef SKIP_DOXYGEN_PROCESSING
52 USING_NCBI_SCOPE;
53 USING_SCOPE(blast);
54 #endif
55
56 static const string NA = "N/A";
57
58 /// The application class
59 class CBlastDBCmdApp : public CNcbiApplication
60 {
61 public:
62 /** @inheritDoc */
CBlastDBCmdApp()63 CBlastDBCmdApp() {
64 CRef<CVersion> version(new CVersion());
65 version->SetVersionInfo(new CBlastVersion());
66 SetFullVersion(version);
67 m_StopWatch.Start();
68 if (m_UsageReport.IsEnabled()) {
69 m_UsageReport.AddParam(CBlastUsageReport::eVersion, GetVersion().Print());
70 m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blastdbcmd");
71 }
72 }
~CBlastDBCmdApp()73 ~CBlastDBCmdApp() {
74 m_UsageReport.AddParam(CBlastUsageReport::eRunTime, m_StopWatch.Elapsed());
75 }
76 private:
77 /** @inheritDoc */
78 virtual void Init();
79 /** @inheritDoc */
80 virtual int Run();
81
82 /// Handle to BLAST database
83 CRef<CSeqDBExpert> m_BlastDb;
84 /// Is the database protein
85 bool m_DbIsProtein;
86 /// output is FASTA
87 bool m_FASTA;
88 /// output is ASN.1 defline
89 bool m_Asn1Bioseq;
90 /// should we find duplicate entries?
91 bool m_GetDuplicates;
92 /// should we output target sequence only?
93 bool m_TargetOnly;
94
95 CBlastDB_FormatterConfig m_Config;
96
97 set<TTaxId> m_TaxIdList;
98
99 CBlastUsageReport m_UsageReport;
100 CStopWatch m_StopWatch;
101
102 /// Initializes Blast DB
103 void x_InitBlastDB();
104 void x_InitBlastDB_TaxIdList();
105
106 string x_InitSearchRequest();
107
108 /// Prints the BLAST database information (e.g.: handles -info command line
109 /// option)
110 void x_PrintBlastDatabaseInformation();
111
112 /// Processes all requests except printing the BLAST database information
113 /// @return 0 on success; 1 if some sequences were not retrieved
114 int x_ProcessSearchRequest();
115
116 /// Process batch entry with range, strand and filter id
117 /// @param args program input args
118 /// @param seq_fmt sequence formatter object
119 /// @return 0 on sucess; 1 if some queries were not processed
120 int x_ProcessBatchEntry(CBlastDB_Formatter & seq_fmt);
121
122 int x_ProcessBatchEntry_NoDup(CBlastDB_Formatter & fmt);
123
124 /// Process entry with range, strand and filter id
125 /// @param args program input args
126 /// @param seq_fmt sequence formatter object
127 /// @return 0 on sucess; 1 if some queries were not processed
128 int x_ProcessEntry(CBlastDB_Formatter & fmt);
129
130 int x_ProcessTaxIdList(CBlastDB_Formatter & fmt);
131
132 int x_ProcessSearchType(CBlastDB_Formatter & fmt);
133
134 bool x_GetOids(const string & acc, vector<int> & oids);
135
136 int x_ModifyConfigForBatchEntry(const string & config);
137
138 bool x_UseLongSeqIds();
139
140 void x_PrintBlastDatabaseTaxInformation();
141
142 int x_ProcessBatchPig(CBlastDB_Formatter & fmt);
143
144 void x_AddCmdOptions();
145 };
146
147
s_PreProcessAccessionsForDBv5(const string & id)148 string s_PreProcessAccessionsForDBv5(const string & id)
149 {
150 string rv = id;
151 if ((id.find('|') != NPOS) || (id.find('_') != NPOS)) {
152
153 CRef<CSeq_id> seqid;
154 try {
155 seqid = new CSeq_id(id, CSeq_id::fParse_RawText | CSeq_id::fParse_AnyLocal | CSeq_id::fParse_PartialOK);
156 }
157 catch(...) {
158 }
159
160 if(seqid.NotEmpty()) {
161 if(seqid->IsPir() || seqid->IsPrf()) {
162 return seqid->AsFastaString();
163 }
164 else if (seqid->IsPdb()) {
165 string tmp = seqid->GetSeqIdString();
166 rv = tmp.substr(0,4);
167 rv += tmp.substr(4);
168 return (rv);
169 }
170 return seqid->GetSeqIdString(true);
171 }
172 }
173
174 return NStr::ToUpper(rv);
175
176 }
177
178
179 bool
x_GetOids(const string & id,vector<int> & oids)180 CBlastDBCmdApp::x_GetOids(const string & id, vector<int> & oids)
181 {
182 string acc = id;
183 if(m_BlastDb->GetBlastDbVersion() == EBlastDbVersion::eBDB_Version5) {
184 acc = s_PreProcessAccessionsForDBv5(id);
185 }
186 TGi num_id = NStr::StringToNumeric<TGi>(acc, NStr::fConvErr_NoThrow);
187 if(!errno) {
188 int gi_oid = -1;
189 m_BlastDb->GiToOidwFilterCheck(num_id, gi_oid);
190 if(gi_oid < 0) {
191 m_BlastDb->AccessionToOids(acc, oids);
192 }
193 else {
194 oids.push_back(gi_oid);
195 }
196
197 }
198 else {
199 m_BlastDb->AccessionToOids(acc, oids);
200 }
201 if(oids.empty()) {
202 ERR_POST(Error << "Entry not found: " << acc);
203 return false;
204 }
205 return true;
206 }
207
208 int
x_ProcessEntry(CBlastDB_Formatter & fmt)209 CBlastDBCmdApp::x_ProcessEntry(CBlastDB_Formatter & fmt)
210 {
211 unsigned int err_found = 0;
212 const CArgs& args = GetArgs();
213 _ASSERT(m_BlastDb.NotEmpty());
214
215 if (args["ipg"].HasValue()) {
216 CSeqDB::TOID oid;
217 m_BlastDb->PigToOid(args["ipg"].AsInteger(),oid);
218 fmt.Write(oid, m_Config);
219 } else if (args["entry"].HasValue()) {
220 static const string kDelim(",");
221 const string& entry = args["entry"].AsString();
222
223 vector<string> queries;
224 if (entry.find(kDelim[0]) != string::npos) {
225 NStr::Split(entry, kDelim, queries);
226 } else {
227 queries.resize(1);
228 queries[0] = entry;
229 }
230 for(unsigned int i=0; i < queries.size(); i++) {
231 vector<CSeqDB::TOID> oids;
232 if(x_GetOids(queries[i], oids)) {
233 for(unsigned int j=0; j < oids.size(); j++) {
234 if(m_TargetOnly) {
235 fmt.Write(oids[j], m_Config, queries[i]);
236 }
237 else {
238 fmt.Write(oids[j], m_Config);
239 }
240 }
241 }
242 else {
243 err_found ++;
244 }
245 }
246 if(err_found == queries.size()) {
247 NCBI_THROW(CInputException, eInvalidInput,
248 "Entry or entries not found in BLAST database");
249 }
250 }
251 return (err_found) ? 1:0;
252 }
253
s_IsMaskAlgoIdValid(CSeqDB & blastdb,int id)254 bool s_IsMaskAlgoIdValid(CSeqDB & blastdb, int id)
255 {
256 if (id >= 0) {
257 vector<int> algo_id(1, id);
258 vector<int> invalid_algo_ids = blastdb.ValidateMaskAlgorithms(algo_id);
259 if ( !invalid_algo_ids.empty()) {
260 ERR_POST(Error << "Invalid filtering algorithm ID: " << NStr::IntToString(id));
261 return false;
262 }
263 }
264 return true;
265 }
266
x_ModifyConfigForBatchEntry(const string & format)267 int CBlastDBCmdApp::x_ModifyConfigForBatchEntry(const string & format)
268 {
269 int status = 0;
270 if (!m_DbIsProtein) {
271 m_Config.m_Strand = eNa_strand_plus;
272 }
273 m_Config.m_SeqRange = TSeqRange::GetEmpty();
274 m_Config.m_FiltAlgoId = -1;
275 if(!format.empty()) {
276 vector<string> tmp;
277 NStr::Split(format, " \t", tmp, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
278 for(unsigned int i=0; i < tmp.size(); i++) {
279 if(tmp[i].find('-')!= string::npos) {
280 try {
281 m_Config.m_SeqRange = ParseSequenceRangeOpenEnd(tmp[i]);
282 } catch (...) {
283 }
284 }
285 else if (!m_DbIsProtein && NStr::EqualNocase(tmp[i].c_str(), "minus")) {
286 m_Config.m_Strand = eNa_strand_minus;
287 }
288 else {
289 m_Config.m_FiltAlgoId = NStr::StringToNonNegativeInt(tmp[i]);
290 if(!s_IsMaskAlgoIdValid(*m_BlastDb, m_Config.m_FiltAlgoId)){
291 status = 1;
292 }
293 }
294 }
295 }
296 return status;
297 }
298
299 int
x_ProcessTaxIdList(CBlastDB_Formatter & fmt)300 CBlastDBCmdApp::x_ProcessTaxIdList(CBlastDB_Formatter & fmt)
301 {
302 vector<blastdb::TOid> oids;
303 m_BlastDb->TaxIdsToOids(m_TaxIdList, oids);
304 if(oids.size() == 0) {
305 ERR_POST (Error << "No seq found in db for taxonomy list");
306 return 1;
307 }
308 for(unsigned i=0; i < oids.size(); i++) {
309 fmt.Write(oids[i], m_Config);
310 }
311 return 0;
312 }
313
314
315 void
x_InitBlastDB_TaxIdList()316 CBlastDBCmdApp::x_InitBlastDB_TaxIdList()
317 {
318 const CArgs& args = GetArgs();
319 vector<string> ids;
320 if(args[kArgTaxIdList].HasValue()) {
321 string input = args[kArgTaxIdList].AsString();
322 NStr::Split(input, ",", ids);
323 }
324 else {
325 CNcbiIstream& input = args[kArgTaxIdListFile].AsInputFile();
326 while (input) {
327 string line;
328 NcbiGetlineEOL(input, line);
329 if ( !line.empty() ) {
330 ids.push_back(line);
331 }
332 }
333 }
334 for(unsigned int i=0; i < ids.size(); i++) {
335 m_TaxIdList.insert(NStr::StringToNumeric<TTaxId>(ids[i], NStr::fAllowLeadingSpaces | NStr::fAllowTrailingSpaces));
336 }
337
338 CSeqDB::ESeqType seqtype = ParseMoleculeTypeString(args[kArgDbType].AsString());
339 m_DbIsProtein = static_cast<bool>(seqtype == CSeqDB::eProtein);
340 m_TargetOnly = args["target_only"];
341 if(m_TargetOnly) {
342 CRef<CSeqDBGiList> taxid_list(new CSeqDBGiList());
343 taxid_list->AddTaxIds(m_TaxIdList);
344 m_BlastDb.Reset(new CSeqDBExpert(args[kArgDb].AsString(), seqtype, taxid_list.GetPointer()));
345 }
346 else {
347 m_BlastDb.Reset(new CSeqDBExpert(args[kArgDb].AsString(), seqtype));
348 }
349 }
350
351
352 int
x_ProcessBatchEntry_NoDup(CBlastDB_Formatter & fmt)353 CBlastDBCmdApp::x_ProcessBatchEntry_NoDup(CBlastDB_Formatter & fmt)
354 {
355 int err_found = 0;
356 const CArgs& args = GetArgs();
357 CNcbiIstream& input = args["entry_batch"].AsInputFile();
358 vector<string> ids, formats;
359 vector<CSeqDB::TOID> oids;
360 while (input) {
361 string line;
362 NcbiGetlineEOL(input, line);
363 if ( !line.empty() ) {
364 string id, format;
365 NStr::SplitInTwo(line, " \t", id, format, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
366 if(id.empty()) {
367 continue;
368 }
369 ids.push_back(id);
370 formats.push_back(format);
371 }
372 }
373
374 if(m_BlastDb->GetBlastDbVersion() == EBlastDbVersion::eBDB_Version5) {
375 for(unsigned int i=0; i < ids.size(); i++) {
376 ids[i] = s_PreProcessAccessionsForDBv5(ids[i]);
377 }
378 }
379 try {
380 m_BlastDb->AccessionsToOids(ids, oids);
381 }
382 catch (CSeqDBException & e) {
383 if (e.GetMsg().find("DB contains no accession info") == NPOS){
384 NCBI_RETHROW_SAME(e, e.GetMsg());
385 }
386 }
387 for(unsigned i=0; i < ids.size(); i++) {
388 if(oids[i] == kSeqDBEntryNotFound) {
389 TGi num_id = NStr::StringToNumeric<TGi>(ids[i], NStr::fConvErr_NoThrow);
390 if(!errno) {
391 int gi_oid = -1;
392 m_BlastDb->GiToOidwFilterCheck(num_id, gi_oid);
393 if(gi_oid >= 0) {
394 oids[i] = gi_oid;
395 }
396 }
397 if(oids[i] == kSeqDBEntryNotFound) {
398 err_found ++;
399 ERR_POST (Error << "Skipped " << ids[i]);
400 continue;
401 }
402 }
403 if(x_ModifyConfigForBatchEntry(formats[i])) {
404 err_found ++;
405 ERR_POST (Error << "Skipped " << ids[i]);
406 continue;
407 }
408 if(m_TargetOnly) {
409 fmt.Write(oids[i], m_Config, ids[i]);
410 }
411 else {
412 fmt.Write(oids[i], m_Config);
413 }
414 }
415 return (err_found) ? 1 : 0;
416 }
417
418 int
x_ProcessBatchEntry(CBlastDB_Formatter & fmt)419 CBlastDBCmdApp::x_ProcessBatchEntry(CBlastDB_Formatter & fmt)
420 {
421 int err_found = 0;
422 const CArgs& args = GetArgs();
423 CNcbiIstream& input = args["entry_batch"].AsInputFile();
424
425 while (input) {
426 string line;
427 NcbiGetlineEOL(input, line);
428 if ( !line.empty() ) {
429 string id, format;
430 NStr::SplitInTwo(line, " \t", id, format, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
431 if(id.empty()) {
432 continue;
433 }
434 if(x_ModifyConfigForBatchEntry(format)) {
435 err_found ++;
436 ERR_POST (Error << "Skipped " << id);
437 continue;
438 }
439 vector<int> oids;
440 if(!x_GetOids(id, oids)) {
441 err_found ++;
442 ERR_POST (Error << "Skipped " << id);
443 continue;
444 }
445
446 if (m_GetDuplicates) {
447 for(unsigned int j=0; j < oids.size(); j++) {
448 fmt.Write(oids[j], m_Config);
449 }
450 }
451 else {
452 if(m_TargetOnly) {
453 fmt.Write(oids[0], m_Config, id);
454 }
455 else {
456 fmt.Write(oids[0], m_Config);
457 }
458 }
459 }
460 }
461 return (err_found) ? 1 : 0;
462 }
463
464
465 int
x_ProcessBatchPig(CBlastDB_Formatter & fmt)466 CBlastDBCmdApp::x_ProcessBatchPig(CBlastDB_Formatter & fmt)
467 {
468 int err_found = 0;
469 const CArgs& args = GetArgs();
470 CNcbiIstream& input = args["ipg_batch"].AsInputFile();
471
472 while (input) {
473 string line;
474 NcbiGetlineEOL(input, line);
475 if ( !line.empty() ) {
476 string id, format;
477 NStr::SplitInTwo(line, " \t", id, format, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
478 if(id.empty()) {
479 continue;
480 }
481 if(x_ModifyConfigForBatchEntry(format)) {
482 err_found ++;
483 ERR_POST (Error << "Skipped IPG : " << id);
484 continue;
485 }
486 int oid;
487 int pig = NStr::StringToInt(id, NStr::fConvErr_NoThrow );
488 m_BlastDb->PigToOid(pig,oid);
489 if (oid == -1) {
490 err_found ++;
491 ERR_POST (Error << "Skipped IPG: " << id);
492 continue;
493 }
494
495 fmt.Write(oid, m_Config);
496 }
497 }
498 return (err_found) ? 1 : 0;
499 }
500
501 void
x_InitBlastDB()502 CBlastDBCmdApp::x_InitBlastDB()
503 {
504 const CArgs& args = GetArgs();
505
506 CSeqDB::ESeqType seqtype = ParseMoleculeTypeString(args[kArgDbType].AsString());
507 m_BlastDb.Reset(new CSeqDBExpert(args[kArgDb].AsString(), seqtype));
508 m_DbIsProtein = static_cast<bool>(m_BlastDb->GetSequenceType() == CSeqDB::eProtein);
509 }
510
511 void
x_PrintBlastDatabaseInformation()512 CBlastDBCmdApp::x_PrintBlastDatabaseInformation()
513 {
514 _ASSERT(m_BlastDb.NotEmpty());
515 static const NStr::TNumToStringFlags kFlags = NStr::fWithCommas;
516 const string kLetters = m_DbIsProtein ? "residues" : "bases";
517 const string kVersion = (m_BlastDb->GetBlastDbVersion() == EBlastDbVersion::eBDB_Version5) ? "5":"4";
518 const CArgs& args = GetArgs();
519
520 CNcbiOstream& out = args[kArgOutput].AsOutputFile();
521
522 // Print basic database information
523 out << "Database: " << m_BlastDb->GetTitle() << endl
524 << "\t" << NStr::IntToString(m_BlastDb->GetNumSeqs(), kFlags)
525 << " sequences; ";
526 if(args["exact_length"])
527 out << NStr::UInt8ToString(m_BlastDb->GetExactTotalLength(), kFlags);
528 else
529 out << NStr::UInt8ToString(m_BlastDb->GetTotalLength(), kFlags);
530 out << " total " << kLetters << endl << endl
531 << "Date: " << m_BlastDb->GetDate()
532 << "\tLongest sequence: "
533 << NStr::IntToString(m_BlastDb->GetMaxLength(), kFlags) << " "
534 << kLetters << endl << endl;
535
536 out << "BLASTDB Version: " << kVersion << endl;
537
538 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
539 (!defined(NCBI_COMPILER_MIPSPRO)) )
540 // Print filtering algorithms supported
541 out << m_BlastDb->GetAvailableMaskAlgorithmDescriptions();
542 #endif
543
544 // Print volume names
545 vector<string> volumes;
546 m_BlastDb->FindVolumePaths(volumes,false);
547 out << endl << "Volumes:" << endl;
548 ITERATE(vector<string>, file_name, volumes) {
549 out << "\t" << *file_name << endl;
550 }
551 }
552
553 class CPrintTaxFields
554 {
555 private:
556 enum {
557 eTaxID,
558 eSciName,
559 eCommonName,
560 eSuperKingdom,
561 eBlastName,
562 eMaxFields
563 };
564 CNcbiOstream & m_Out;
565 vector<int> m_Fields;
566 vector<string> m_Seperators;
567 bool m_NeedTaxInfoLookup;
568 public:
CPrintTaxFields(CNcbiOstream & out,const string & fmt)569 CPrintTaxFields(CNcbiOstream & out, const string & fmt): m_Out(out), m_NeedTaxInfoLookup(true) {
570 vector<string> fields;
571 string sp = kEmptyStr;
572 if(fmt == "%f") {
573 m_Seperators.push_back(sp);
574 for(unsigned int i=eTaxID; i < eMaxFields; i++){
575 m_Fields.push_back(i);
576 m_Seperators.push_back("\t");
577 }
578 return;
579 }
580
581 for (unsigned int i = 0; i < fmt.size(); i++) {
582 if (fmt[i] == '%') {
583 if (fmt[i+1] == '%') {
584 sp += fmt[i];
585 continue;
586 }
587 i++;
588 switch (fmt[i]) {
589 case 'T' :
590 m_Fields.push_back(eTaxID);
591 break;
592 case 'S' :
593 m_Fields.push_back(eSciName);
594 break;
595 case 'L' :
596 m_Fields.push_back(eCommonName);
597 break;
598 case 'K' :
599 m_Fields.push_back(eSuperKingdom);
600 break;
601 case 'B' :
602 m_Fields.push_back(eBlastName);
603 break;
604 default:
605 sp += fmt[i-1];
606 sp += fmt[i];
607 continue;
608 break;
609 }
610 m_Seperators.push_back(sp);
611 sp = kEmptyStr;
612 }
613 else {
614 sp += fmt[i];
615 }
616 }
617 m_Seperators.push_back(sp);
618
619 if(m_Fields.empty()) {
620 NCBI_THROW(CInputException, eInvalidInput,
621 "Invalid format options for tax_info.");
622 }
623 if((m_Fields.size() == 1) && (m_Fields[0] == eTaxID)){
624 m_NeedTaxInfoLookup = false;
625 }
626 }
627
PrintEntry(const SSeqDBTaxInfo & t)628 void PrintEntry(const SSeqDBTaxInfo & t){
629 for(unsigned int i=0; i < m_Fields.size(); i++) {
630 m_Out << m_Seperators[i];
631 switch (m_Fields[i]){
632 case eTaxID:
633 m_Out << t.taxid;
634 break;
635 case eSciName:
636 m_Out << t.scientific_name;
637 break;
638 case eCommonName:
639 m_Out << t.common_name;
640 break;
641 case eSuperKingdom:
642 m_Out << t.s_kingdom;
643 break;
644 case eBlastName:
645 m_Out << t.blast_name;
646 break;
647 default:
648 NCBI_THROW(CInputException, eInvalidInput,
649 "Invalid format options for tax_info.");
650 break;
651 }
652 }
653 m_Out << m_Seperators.back();
654 m_Out << "\n";
655 }
NeedTaxNames()656 bool NeedTaxNames(){return m_NeedTaxInfoLookup;}
657 };
658
659
660 void
x_PrintBlastDatabaseTaxInformation()661 CBlastDBCmdApp::x_PrintBlastDatabaseTaxInformation()
662 {
663 _ASSERT(m_BlastDb.NotEmpty());
664 const CArgs& args = GetArgs();
665
666 CNcbiOstream& out = args[kArgOutput].AsOutputFile();
667 const string& kFmt = args["outfmt"].AsString();
668 CPrintTaxFields tf(out, kFmt);
669 set<TTaxId> tax_ids;
670 m_BlastDb->GetDBTaxIds(tax_ids);
671 // Print basic database information
672 out << "# of Tax IDs in Database: " << tax_ids.size() << endl;
673 SSeqDBTaxInfo info;
674 ITERATE(set<TTaxId>, itr, tax_ids) {
675 SSeqDBTaxInfo info;
676 if(tf.NeedTaxNames()){
677 CSeqDBTaxInfo::GetTaxNames(*itr, info);
678 if(info.taxid == ZERO_TAX_ID){
679 info.taxid = *itr;
680 info.scientific_name = NA;
681 info.common_name = NA;
682 info.blast_name = NA;
683 info.s_kingdom = NA;
684 }
685 }
686 else {
687 info.taxid = *itr;
688 }
689 tf.PrintEntry(info);
690 }
691 }
692
693
694 string
x_InitSearchRequest()695 CBlastDBCmdApp::x_InitSearchRequest()
696 {
697 const CArgs& args = GetArgs();
698 m_GetDuplicates = args["get_dups"];
699 m_TargetOnly = args["target_only"];
700
701 string outfmt = kEmptyStr;
702 if (args["outfmt"].HasValue()) {
703 outfmt = args["outfmt"].AsString();
704 m_FASTA = false;
705 m_Asn1Bioseq = false;
706
707 if ((outfmt.find("%f") != string::npos &&
708 (outfmt.find("%b") != string::npos || outfmt.find("%d") != string::npos)) ||
709 (outfmt.find("%b") != string::npos && outfmt.find("%d") != string::npos)) {
710 NCBI_THROW(CInputException, eInvalidInput,
711 "The %f, %b, %d output format options cannot be specified together.");
712 }
713
714 if (outfmt.find("%b") != string::npos) {
715 outfmt = "%b";
716 m_Asn1Bioseq = true;
717 }
718
719 // If "%f" is found within outfmt, discard everything else
720 if (outfmt.find("%f") != string::npos) {
721 outfmt = "%f";
722 m_FASTA = true;
723 }
724
725 if (outfmt.find("%d") != string::npos) {
726 outfmt = "%d";
727 }
728
729 if (outfmt.find("%m") != string::npos) {
730 int algo_id = 0;
731 size_t i = outfmt.find("%m") + 2;
732 bool found = false;
733 while (i < outfmt.size() && outfmt[i] >= '0' && outfmt[i] <= '9') {
734 algo_id = algo_id * 10 + (outfmt[i] - '0');
735 outfmt.erase(i, 1);
736 found = true;
737 }
738 if (!found) {
739 NCBI_THROW(CInputException, eInvalidInput,
740 "The option '-outfmt %m' is not followed by a masking algo ID.");
741 }
742 m_Config.m_FmtAlgoId = algo_id;
743 if(!s_IsMaskAlgoIdValid(*m_BlastDb, m_Config.m_FmtAlgoId)) {
744 NCBI_THROW(CInvalidDataException, eInvalidInput,
745 "Invalid filtering algorithm ID for outfmt %m.");
746 }
747 }
748 }
749
750 if (args["strand"].HasValue() && !m_DbIsProtein) {
751 if (args["strand"].AsString() == "plus") {
752 m_Config.m_Strand = eNa_strand_plus;
753 } else if (args["strand"].AsString() == "minus") {
754 m_Config.m_Strand = eNa_strand_minus;
755 } else {
756 NCBI_THROW(CInputException, eInvalidInput,
757 "Both strands is not supported");
758 }
759 }
760 m_Config.m_UseCtrlA = args["ctrl_a"];
761 if (args["mask_sequence_with"].HasValue()) {
762 m_Config.m_FiltAlgoId = -1;
763 m_Config.m_FiltAlgoId = NStr::StringToInt(args["mask_sequence_with"].AsString(), NStr::fConvErr_NoThrow);
764 if(errno) {
765 m_Config.m_FiltAlgoId = m_BlastDb->GetMaskAlgorithmId(args["mask_sequence_with"].AsString());
766 }
767 if(!s_IsMaskAlgoIdValid(*m_BlastDb, m_Config.m_FiltAlgoId)){
768 NCBI_THROW(CInvalidDataException, eInvalidInput,
769 "Invalid filtering algorithm ID for mask_sequence_with.");
770 }
771 }
772 if (args["range"].HasValue()) {
773 m_Config.m_SeqRange = ParseSequenceRangeOpenEnd(args["range"].AsString());
774 }
775 return outfmt;
776 }
777
778 int
x_ProcessSearchType(CBlastDB_Formatter & fmt)779 CBlastDBCmdApp::x_ProcessSearchType(CBlastDB_Formatter & fmt)
780 {
781 const CArgs& args = GetArgs();
782 if (args["entry"].HasValue() && args["entry"].AsString() == "all") {
783 fmt.DumpAll(m_Config);
784 }
785 else if (args["entry_batch"].HasValue()) {
786 if(m_GetDuplicates) {
787 return x_ProcessBatchEntry(fmt);
788 }
789 else {
790 return x_ProcessBatchEntry_NoDup(fmt);
791 }
792 }
793 else if (args["entry"].HasValue() || args["ipg"].HasValue()) {
794 return x_ProcessEntry(fmt);
795 }
796 else if (args["ipg_batch"].HasValue()) {
797 return x_ProcessBatchPig(fmt);
798 }
799 else if(args[kArgTaxIdList].HasValue()||
800 args[kArgTaxIdListFile].HasValue()) {
801 return x_ProcessTaxIdList(fmt);
802 }
803 else {
804 NCBI_THROW(CInputException, eInvalidInput,
805 "Must specify query type: one of 'entry', 'entry_batch', or 'pig'");
806 }
807 return 0;
808 }
809
x_UseLongSeqIds()810 bool CBlastDBCmdApp::x_UseLongSeqIds()
811 {
812 const CArgs& args = GetArgs();
813 if (args["long_seqids"].AsBoolean()) {
814 return true;
815 }
816 CNcbiApplication* app = CNcbiApplication::Instance();
817 if (app) {
818 const CNcbiRegistry& registry = app->GetConfig();
819 if (registry.Get("BLAST", "LONG_SEQID") == "1") {
820 return true;
821 }
822 }
823 return false;
824 }
825
826 int
x_ProcessSearchRequest()827 CBlastDBCmdApp::x_ProcessSearchRequest()
828 {
829 int err_found = 0;
830 try {
831 const CArgs& args = GetArgs();
832 CNcbiOstream& out = args[kArgOutput].AsOutputFile();
833 string outfmt = x_InitSearchRequest();
834 /* Special case: full db dump when no range and mask data is specified */
835 if (m_FASTA) {
836 CBlastDB_FastaFormatter fasta_fmt(*m_BlastDb, out, args["line_length"].AsInteger(), x_UseLongSeqIds());
837 err_found = x_ProcessSearchType(fasta_fmt);
838 }
839 else if (m_Asn1Bioseq) {
840 CBlastDB_BioseqFormatter bioseq_fmt(*m_BlastDb, out);
841 err_found = x_ProcessSearchType(bioseq_fmt);
842 }
843 else {
844 CBlastDB_SeqFormatter seq_fmt(outfmt, *m_BlastDb, out);
845 err_found = x_ProcessSearchType(seq_fmt);
846 }
847 }
848 catch (const CException& e) {
849 ERR_POST(Error << e.GetMsg());
850 err_found = 1;
851 } catch (...) {
852 ERR_POST(Error << "Failed to retrieve requested item");
853 err_found = 1;
854 }
855 return err_found;
856 }
857
858
Init()859 void CBlastDBCmdApp::Init()
860 {
861 HideStdArgs(fHideConffile | fHideFullVersion | fHideXmlHelp | fHideDryRun);
862
863 auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
864
865 // Specify USAGE context
866 arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
867 "BLAST database client, version " + CBlastVersion().Print());
868
869 arg_desc->SetCurrentGroup("BLAST database options");
870 arg_desc->AddDefaultKey(kArgDb, "dbname", "BLAST database name",
871 CArgDescriptions::eString, "nr");
872
873 arg_desc->AddDefaultKey(kArgDbType, "molecule_type",
874 "Molecule type stored in BLAST database",
875 CArgDescriptions::eString, "guess");
876 arg_desc->SetConstraint(kArgDbType, &(*new CArgAllow_Strings,
877 "nucl", "prot", "guess"));
878
879 arg_desc->SetCurrentGroup("Retrieval options");
880 arg_desc->AddOptionalKey("entry", "sequence_identifier",
881 "Comma-delimited search string(s) of sequence identifiers"
882 ":\n\te.g.: 555, AC147927, 'gnl|dbname|tag', or 'all' "
883 "to select all\n\tsequences in the database",
884 CArgDescriptions::eString);
885
886 arg_desc->AddOptionalKey("entry_batch", "input_file",
887 "Input file for batch processing (Format: one entry per line, seq id \n"
888 "followed by optional space-delimited specifier(s) [range|strand|mask_algo_id]",
889 CArgDescriptions::eInputFile);
890 arg_desc->SetDependency("entry_batch", CArgDescriptions::eExcludes, "entry");
891 arg_desc->SetDependency("entry_batch", CArgDescriptions::eExcludes, "range");
892 arg_desc->SetDependency("entry_batch", CArgDescriptions::eExcludes, "strand");
893 arg_desc->SetDependency("entry_batch", CArgDescriptions::eExcludes, "mask_sequence_with");
894
895 arg_desc->AddOptionalKey("ipg", "IPG", "IPG to retrieve",
896 CArgDescriptions::eInteger);
897 arg_desc->SetConstraint("ipg", new CArgAllowValuesGreaterThanOrEqual(0));
898 arg_desc->SetDependency("ipg", CArgDescriptions::eExcludes, "entry");
899 arg_desc->SetDependency("ipg", CArgDescriptions::eExcludes, "entry_batch");
900 arg_desc->SetDependency("ipg", CArgDescriptions::eExcludes, "target_only");
901 arg_desc->SetDependency("ipg", CArgDescriptions::eExcludes, "ipg_batch");
902
903 arg_desc->AddOptionalKey("ipg_batch", "input_file",
904 "Input file for batch processing (Format: one entry per line, IPG \n"
905 "followed by optional space-delimited specifier(s) [range|strand|mask_algo_id]",
906 CArgDescriptions::eInputFile);
907 arg_desc->SetDependency("ipg_batch", CArgDescriptions::eExcludes, "entry");
908 arg_desc->SetDependency("ipg_batch", CArgDescriptions::eExcludes, "entry_batch");
909 arg_desc->SetDependency("ipg_batch", CArgDescriptions::eExcludes, "range");
910 arg_desc->SetDependency("ipg_batch", CArgDescriptions::eExcludes, "strand");
911 arg_desc->SetDependency("ipg_batch", CArgDescriptions::eExcludes, "mask_sequence_with");
912
913 arg_desc->AddOptionalKey(kArgTaxIdList, "taxonomy_ids",
914 "Comma-delimited taxonomy identifiers", CArgDescriptions::eString);
915 arg_desc->SetDependency(kArgTaxIdList, CArgDescriptions::eExcludes, "entry");
916 arg_desc->SetDependency(kArgTaxIdList, CArgDescriptions::eExcludes, "entry_batch");
917 arg_desc->SetDependency(kArgTaxIdList, CArgDescriptions::eExcludes, "pig");
918
919 arg_desc->AddOptionalKey(kArgTaxIdListFile, "input_file",
920 "Input file for taxonomy identifiers", CArgDescriptions::eInputFile);
921 arg_desc->SetDependency(kArgTaxIdListFile, CArgDescriptions::eExcludes, "entry");
922 arg_desc->SetDependency(kArgTaxIdListFile, CArgDescriptions::eExcludes, "entry_batch");
923 arg_desc->SetDependency(kArgTaxIdListFile, CArgDescriptions::eExcludes, "pig");
924 arg_desc->SetDependency(kArgTaxIdListFile, CArgDescriptions::eExcludes, kArgTaxIdList);
925
926 arg_desc->AddFlag("info", "Print BLAST database information", true);
927 // All other options to this program should be here
928 const char* exclusions[] = { "entry", "entry_batch", "outfmt", "strand",
929 "target_only", "ctrl_a", "get_dups", "pig", "range",
930 "mask_sequence", "list", "remove_redundant_dbs", "recursive",
931 "list_outfmt", kArgTaxIdListFile.c_str(), kArgTaxIdList.c_str()};
932 for (size_t i = 0; i < sizeof(exclusions)/sizeof(*exclusions); i++) {
933 arg_desc->SetDependency("info", CArgDescriptions::eExcludes,
934 string(exclusions[i]));
935 }
936
937 arg_desc->AddFlag("tax_info",
938 "Print taxonomic information contained in this BLAST database.\n"
939 "Use -outfmt to customize output. Format specifiers supported are:\n"
940 "\t\t%T means taxid\n"
941 "\t\t%L means common taxonomic name\n"
942 "\t\t%S means scientific name\n"
943 "\t\t%K means taxonomic super kingdom\n"
944 "\t\t%B means BLAST name\n"
945 "By default it prints: '%T %S %L %K %B'\n", true);
946 // All other options to this program should be here
947 const char* tax_info_exclusions[] = { "info", "entry", "entry_batch", "strand",
948 "target_only", "ctrl_a", "get_dups", "pig", "range",
949 "mask_sequence", "list", "remove_redundant_dbs", "recursive",
950 "list_outfmt", kArgTaxIdListFile.c_str(), kArgTaxIdList.c_str() };
951 for (size_t i = 0; i < sizeof(tax_info_exclusions)/sizeof(*tax_info_exclusions); i++) {
952 arg_desc->SetDependency("tax_info", CArgDescriptions::eExcludes,
953 string(tax_info_exclusions[i]));
954 }
955
956 arg_desc->SetCurrentGroup("Sequence retrieval configuration options");
957 arg_desc->AddOptionalKey("range", "numbers",
958 "Range of sequence to extract in 1-based offsets "
959 "(Format: start-stop, for start to end of sequence use start - )",
960 CArgDescriptions::eString);
961
962 arg_desc->AddDefaultKey("strand", "strand",
963 "Strand of nucleotide sequence to extract",
964 CArgDescriptions::eString, "plus");
965 arg_desc->SetConstraint("strand", &(*new CArgAllow_Strings, "minus",
966 "plus"));
967
968 arg_desc->AddOptionalKey("mask_sequence_with", "mask_algo_id",
969 "Produce lower-case masked FASTA using the "
970 "algorithm ID specified",
971 CArgDescriptions::eString);
972
973 arg_desc->SetCurrentGroup("Output configuration options");
974 arg_desc->AddDefaultKey(kArgOutput, "output_file", "Output file name",
975 CArgDescriptions::eOutputFile, "-");
976
977 // The format specifiers below should be handled in
978 // CSeqFormatter::x_Builder
979 arg_desc->AddDefaultKey("outfmt", "format",
980 "Output format, where the available format specifiers are:\n"
981 "\t\t%f means sequence in FASTA format\n"
982 "\t\t%s means sequence data (without defline)\n"
983 "\t\t%a means accession\n"
984 "\t\t%g means gi\n"
985 "\t\t%o means ordinal id (OID)\n"
986 "\t\t%i means sequence id\n"
987 "\t\t%t means sequence title\n"
988 "\t\t%l means sequence length\n"
989 "\t\t%h means sequence hash value\n"
990 "\t\t%T means taxid\n"
991 "\t\t%X means leaf-node taxids\n"
992 "\t\t%e means membership integer\n"
993 "\t\t%L means common taxonomic name\n"
994 "\t\t%C means common taxonomic names for leaf-node taxids\n"
995 "\t\t%S means scientific name\n"
996 "\t\t%N means scientific names for leaf-node taxids\n"
997 "\t\t%B means BLAST name\n" /* Is this useful outside NCBI? */
998 #if _DEBUG
999 "\t\t%n means a list of links integers separated by ';'\n"
1000 #endif /* _DEBUG */
1001 "\t\t%K means taxonomic super kingdom\n"
1002 "\t\t%P means PIG\n"
1003 #if _DEBUG
1004 "\t\t%d means defline in text ASN.1 format\n"
1005 "\t\t%b means Bioseq in text ASN.1 format\n"
1006 #endif /* _DEBUG */
1007 "\t\t%m means sequence masking data.\n"
1008 "\t\t Masking data will be displayed as a series of 'N-M' values\n"
1009 "\t\t separated by ';' or the word 'none' if none are available.\n"
1010 #if _DEBUG
1011 "\tIf '%f' or '%d' are specified, all other format specifiers are ignored.\n"
1012 "\tFor every format except '%f' and '%d', each line of output will "
1013 #else
1014 "\tIf '%f' is specified, all other format specifiers are ignored.\n"
1015 "\tFor every format except '%f', each line of output will "
1016 #endif /* _DEBUG */
1017 "correspond\n\tto a sequence.\n",
1018 CArgDescriptions::eString, "%f");
1019
1020 //arg_desc->AddDefaultKey("target_only", "value",
1021 // "Definition line should contain target gi only",
1022 // CArgDescriptions::eBoolean, "false");
1023 arg_desc->AddFlag("target_only",
1024 "Definition line should contain target entry only", true);
1025
1026 //arg_desc->AddDefaultKey("get_dups", "value",
1027 // "Retrieve duplicate accessions",
1028 // CArgDescriptions::eBoolean, "false");
1029 arg_desc->AddFlag("get_dups", "Retrieve duplicate accessions", true);
1030 arg_desc->SetDependency("get_dups", CArgDescriptions::eExcludes,
1031 "target_only");
1032
1033 arg_desc->SetCurrentGroup("Output configuration options for FASTA format");
1034 arg_desc->AddDefaultKey("line_length", "number", "Line length for output",
1035 CArgDescriptions::eInteger,
1036 NStr::IntToString(80));
1037 arg_desc->SetConstraint("line_length",
1038 new CArgAllowValuesGreaterThanOrEqual(1));
1039
1040 arg_desc->AddFlag("ctrl_a",
1041 "Use Ctrl-A as the non-redundant defline separator",true);
1042
1043 const char* exclusions_discovery[] = { "entry", "entry_batch", "outfmt",
1044 "strand", "target_only", "ctrl_a", "get_dups", "pig", "range", kArgDb.c_str(),
1045 "info", "mask_sequence", "line_length" };
1046 arg_desc->SetCurrentGroup("BLAST database configuration and discovery options");
1047 arg_desc->AddFlag("show_blastdb_search_path",
1048 "Displays the default BLAST database search paths", true);
1049 arg_desc->AddOptionalKey("list", "directory",
1050 "List BLAST databases in the specified directory",
1051 CArgDescriptions::eString);
1052 arg_desc->AddFlag("remove_redundant_dbs",
1053 "Remove the databases that are referenced by another "
1054 "alias file in the directory in question", true);
1055 arg_desc->AddFlag("recursive",
1056 "Recursively traverse the directory structure to list "
1057 "available BLAST databases", true);
1058 arg_desc->AddDefaultKey("list_outfmt", "format",
1059 "Output format for the list option, where the available format specifiers are:\n"
1060 "\t\t%f means the BLAST database absolute file name path\n"
1061 "\t\t%p means the BLAST database molecule type\n"
1062 "\t\t%t means the BLAST database title\n"
1063 "\t\t%d means the date of last update of the BLAST database\n"
1064 "\t\t%l means the number of bases/residues in the BLAST database\n"
1065 "\t\t%n means the number of sequences in the BLAST database\n"
1066 "\t\t%U means the number of bytes used by the BLAST database\n"
1067 "\t\t%v means the BLAST database format version\n"
1068 "\tFor every format each line of output will "
1069 "correspond to a BLAST database.\n",
1070 CArgDescriptions::eString, "%f %p");
1071 for (size_t i = 0; i <
1072 sizeof(exclusions_discovery)/sizeof(*exclusions_discovery); i++) {
1073 arg_desc->SetDependency("list", CArgDescriptions::eExcludes,
1074 string(exclusions_discovery[i]));
1075 arg_desc->SetDependency("recursive", CArgDescriptions::eExcludes,
1076 string(exclusions_discovery[i]));
1077 arg_desc->SetDependency("remove_redundant_dbs", CArgDescriptions::eExcludes,
1078 string(exclusions_discovery[i]));
1079 arg_desc->SetDependency("list_outfmt", CArgDescriptions::eExcludes,
1080 string(exclusions_discovery[i]));
1081 arg_desc->SetDependency("show_blastdb_search_path", CArgDescriptions::eExcludes,
1082 string(exclusions_discovery[i]));
1083 }
1084 arg_desc->SetDependency("show_blastdb_search_path", CArgDescriptions::eExcludes,
1085 "list");
1086 arg_desc->SetDependency("show_blastdb_search_path", CArgDescriptions::eExcludes,
1087 "recursive");
1088 arg_desc->SetDependency("show_blastdb_search_path", CArgDescriptions::eExcludes,
1089 "list_outfmt");
1090 arg_desc->SetDependency("show_blastdb_search_path", CArgDescriptions::eExcludes,
1091 "remove_redundant_dbs");
1092
1093 arg_desc->AddFlag("exact_length", "Get exact length for db info", true);
1094 arg_desc->SetDependency("exact_length", CArgDescriptions::eRequires,
1095 "info");
1096 arg_desc->AddFlag("long_seqids", "Use long seq id for fasta deflines", true);
1097 arg_desc->SetDependency("long_seqids", CArgDescriptions::eExcludes, "info");
1098 SetupArgDescriptions(arg_desc.release());
1099 }
1100
Run(void)1101 int CBlastDBCmdApp::Run(void)
1102 {
1103 int status = 0;
1104 const CArgs& args = GetArgs();
1105
1106 // Silences warning in CSeq_id for CSeq_id::fParse_PartialOK
1107 SetDiagFilter(eDiagFilter_Post, "!(1306.10)");
1108 SetDiagPostLevel(eDiag_Warning);
1109 SetDiagPostPrefix("blastdbcmd");
1110
1111 try {
1112 CNcbiOstream& out = args["out"].AsOutputFile();
1113 if (args["show_blastdb_search_path"]) {
1114 out << CSeqDB::GenerateSearchPath() << NcbiEndl;
1115 return status;
1116 } else if (args["list"]) {
1117 const string& blastdb_dir = args["list"].AsString();
1118 const bool recurse = args["recursive"];
1119 const bool remove_redundant_dbs = args["remove_redundant_dbs"];
1120 const string dbtype = args[kArgDbType]
1121 ? args[kArgDbType].AsString()
1122 : "guess";
1123 const string& kOutFmt = args["list_outfmt"].AsString();
1124 const vector<SSeqDBInitInfo> dbs =
1125 FindBlastDBs(blastdb_dir, dbtype, recurse, true,
1126 remove_redundant_dbs);
1127 CBlastDbFormatter blastdb_fmt(kOutFmt);
1128 ITERATE(vector<SSeqDBInitInfo>, db, dbs) {
1129 out << blastdb_fmt.Write(*db) << NcbiEndl;
1130 }
1131 return status;
1132 }
1133
1134 if (args["info"]) {
1135 x_InitBlastDB();
1136 x_PrintBlastDatabaseInformation();
1137 }
1138 else if (args["tax_info"]) {
1139 x_InitBlastDB();
1140 x_PrintBlastDatabaseTaxInformation();
1141 }
1142 else if(args[kArgTaxIdList].HasValue() ||
1143 args[kArgTaxIdListFile].HasValue()) {
1144 x_InitBlastDB_TaxIdList();
1145 status = x_ProcessSearchRequest();
1146 }
1147 else {
1148 x_InitBlastDB();
1149 status = x_ProcessSearchRequest();
1150 }
1151 x_AddCmdOptions();
1152
1153 } CATCH_ALL(status)
1154
1155 m_UsageReport.AddParam(CBlastUsageReport::eExitStatus, status);
1156 return status;
1157 }
1158
x_AddCmdOptions()1159 void CBlastDBCmdApp::x_AddCmdOptions()
1160 {
1161 const CArgs & args = GetArgs();
1162 if (args["info"]) {
1163 m_UsageReport.AddParam(CBlastUsageReport::eDBInfo, true);
1164 }
1165 else if (args["tax_info"]) {
1166 m_UsageReport.AddParam(CBlastUsageReport::eDBTaxInfo, true);
1167 }
1168 else if(args[kArgTaxIdList].HasValue() || args[kArgTaxIdListFile].HasValue()) {
1169 m_UsageReport.AddParam(CBlastUsageReport::eTaxIdList, true);
1170 }
1171 else if(args["ipg"].HasValue() || args["ipg_batch"].HasValue()) {
1172 m_UsageReport.AddParam(CBlastUsageReport::eIPGList, true);
1173 }
1174 else if(args["entry"].HasValue() || args["entry_batch"].HasValue()) {
1175 m_UsageReport.AddParam(CBlastUsageReport::eDBEntry, true);
1176 if (args["entry"].HasValue() && args["entry"].AsString() == "all") {
1177 m_UsageReport.AddParam(CBlastUsageReport::eDBDumpAll, true);
1178 }
1179 else {
1180 m_UsageReport.AddParam(CBlastUsageReport::eDBEntry, true);
1181 }
1182 }
1183 if(args["outfmt"].HasValue()) {
1184 m_UsageReport.AddParam(CBlastUsageReport::eOutputFmt, args["outfmt"].AsString());
1185 }
1186
1187
1188 string db_name = m_BlastDb->GetDBNameList();
1189 int off = db_name.find_last_of(CFile::GetPathSeparator());
1190 if (off != -1) {
1191 db_name.erase(0, off+1);
1192 }
1193 m_UsageReport.AddParam(CBlastUsageReport::eDBName, db_name);
1194 m_UsageReport.AddParam(CBlastUsageReport::eDBLength, (Int8) m_BlastDb->GetTotalLength());
1195 m_UsageReport.AddParam(CBlastUsageReport::eDBNumSeqs, m_BlastDb->GetNumSeqs());
1196 m_UsageReport.AddParam(CBlastUsageReport::eDBDate, m_BlastDb->GetDate());
1197 }
1198
1199
1200
1201 #ifndef SKIP_DOXYGEN_PROCESSING
main(int argc,const char * argv[])1202 int main(int argc, const char* argv[] /*, const char* envp[]*/)
1203 {
1204 return CBlastDBCmdApp().AppMain(argc, argv);
1205 }
1206 #endif /* SKIP_DOXYGEN_PROCESSING */
1207