1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <ncbi-vdb/NGS.hpp>
28 #include <ngs/ErrorMsg.hpp>
29 #include <ngs/ReadCollection.hpp>
30 #include <ngs/ReadIterator.hpp>
31 #include <ngs/Read.hpp>
32
33
34 #include <math.h>
35 #include <iostream>
36
37 using namespace ngs;
38 using namespace std;
39
40 class DumpReferenceFASTA
41 {
42 public:
43
process(const Reference & ref)44 static void process ( const Reference & ref )
45 {
46 uint64_t len = ref . getLength ();
47
48 size_t line = 0;
49
50 cout << '>' << ref . getCanonicalName () << '\n';
51
52 try
53 {
54 for ( uint64_t offset = 0; offset < len; offset += 5000 )
55 {
56 StringRef chunk = ref . getReferenceChunk ( offset, 5000 );
57 size_t chunk_len = chunk . size ();
58 for ( size_t chunk_idx = 0; chunk_idx < chunk_len; )
59 {
60 StringRef chunk_line = chunk . substr ( chunk_idx, 70 - line );
61 line += chunk_line . size ();
62 chunk_idx += chunk_line . size ();
63
64 cout << chunk_line;
65 if ( line >= 70 )
66 {
67 cout << '\n';
68 line = 0;
69 }
70 }
71 }
72 if (line != 0)
73 cout << '\n';
74 }
75 catch ( ErrorMsg x )
76 {
77 }
78 }
79
run(const String & acc,const String & reference)80 static void run ( const String & acc, const String & reference )
81 {
82
83 // open requested accession using SRA implementation of the API
84 ReadCollection run = ncbi::NGS::openReadCollection ( acc );
85 Reference ref = run . getReference ( reference );
86 process ( ref );
87 }
88
run(const String & acc)89 static void run ( const String & acc )
90 {
91
92 // open requested accession using SRA implementation of the API
93 ReadCollection run = ncbi::NGS::openReadCollection ( acc );
94 ReferenceIterator refs = run . getReferences ();
95 while ( refs . nextReference () )
96 {
97 process ( refs );
98 }
99 }
100 };
101
main(int argc,char const * argv[])102 int main (int argc, char const *argv[])
103 {
104 if ( argc < 2 || argc > 3)
105 {
106 cerr << "Usage: DumpReferenceFASTA accession [ reference ]\n";
107 }
108 else try
109 {
110 ncbi::NGS::setAppVersionString ( "DumpReferenceFASTA.1.0.0" );
111 if ( argc == 3 )
112 DumpReferenceFASTA::run ( argv[1], argv[2] );
113 else
114 DumpReferenceFASTA::run ( argv[1] );
115 return 0;
116 }
117 catch ( ErrorMsg & x )
118 {
119 cerr << x.toString () << '\n';
120 }
121 catch ( exception & x )
122 {
123 cerr << x.what () << '\n';
124 }
125 catch ( ... )
126 {
127 cerr << "unknown exception\n";
128 }
129
130 return 10;
131 }
132