1 /* $Id: csra_test_mt.cpp 497463 2016-04-06 19:10:32Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Eugene Vasilchenko
27 *
28 * File Description:
29 * Sample test application for cSRA reader
30 *
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbifile.hpp>
36 #include <corelib/ncbi_system.hpp>
37 #include <corelib/test_mt.hpp>
38 #include <util/random_gen.hpp>
39 #include <sra/readers/sra/csraread.hpp>
40 #include <sra/readers/ncbi_traces_path.hpp>
41
42 #include <objects/general/general__.hpp>
43 #include <objects/seq/seq__.hpp>
44 #include <objects/seqset/seqset__.hpp>
45 #include <objects/seqalign/seqalign__.hpp>
46 #include <objects/seqres/seqres__.hpp>
47
48 #include <serial/serial.hpp>
49 #include <serial/iterator.hpp>
50 #include <serial/objostrasnb.hpp>
51 #include <serial/objistrasnb.hpp>
52
53 #include <common/test_assert.h> /* This header must go last */
54
55 USING_NCBI_SCOPE;
56 USING_SCOPE(objects);
57
58 /////////////////////////////////////////////////////////////////////////////
59 // CCSRATestApp::
60
61
62 class CCSRATestApp : public CThreadedApp
63 {
64 private:
65 virtual bool Thread_Run(int idx);
66 virtual bool TestApp_Init(void);
67 virtual bool TestApp_Exit(void);
68 virtual bool TestApp_Args(CArgDescriptions& args);
69
70 bool m_Verbose;
71 int m_Seed;
72 int m_IterCount, m_IterSize;
73 int m_ErrorCount;
74 vector<string> m_Accession;
75 vector<TVDBRowId> m_MaxSpotId;
76
77 CVDBMgr m_Mgr;
78 };
79
80
81 /////////////////////////////////////////////////////////////////////////////
82 // Init test
TestApp_Args(CArgDescriptions & args)83 bool CCSRATestApp::TestApp_Args(CArgDescriptions& args)
84 {
85 // Specify USAGE context
86 args.SetUsageContext(GetArguments().GetProgramBasename(),
87 "csra_test_mt");
88
89 args.AddDefaultKey("accs", "Accessions",
90 "comma separated SRA accession list",
91 CArgDescriptions::eString,
92 "SRR000010,SRR389414,SRR494733,SRR505887,SRR035417");
93 args.AddDefaultKey("iter_count", "IterationCount",
94 "Number of read iterations",
95 CArgDescriptions::eInteger,
96 "10");
97 args.AddDefaultKey("iter_size", "IterationSize",
98 "Number of sequential sequences in one iteration",
99 CArgDescriptions::eInteger,
100 "10");
101 args.AddFlag("verbose", "Print info about progress");
102
103 return true;
104 }
105
106
TestApp_Init(void)107 bool CCSRATestApp::TestApp_Init(void)
108 {
109 SetDiagPostLevel(eDiag_Info);
110 const CArgs& args = GetArgs();
111 m_Verbose = args["verbose"];
112 m_ErrorCount = 0;
113 m_Seed = args["seed"]? args["seed"].AsInteger(): int(time(0));
114 if ( m_Verbose ) {
115 LOG_POST(Info<<"Seed: "<<m_Seed);
116 }
117 NStr::Split(args["accs"].AsString(), ",", m_Accession);
118 if ( m_Accession.empty() ) {
119 ERR_POST(Fatal<<"empty accession list");
120 }
121 m_IterCount = args["iter_count"].AsInteger();
122 m_IterSize = args["iter_size"].AsInteger();
123 m_MaxSpotId.assign(m_Accession.size(), 0);
124 return true;
125 }
126
127
TestApp_Exit(void)128 bool CCSRATestApp::TestApp_Exit(void)
129 {
130 if ( m_ErrorCount ) {
131 ERR_POST("Errors found: "<<m_ErrorCount);
132 }
133 else {
134 LOG_POST("Done.");
135 }
136 return !m_ErrorCount;
137 }
138
139 /////////////////////////////////////////////////////////////////////////////
140 // Run test
141 /////////////////////////////////////////////////////////////////////////////
142
s_Check(const CBioseq & seq)143 void s_Check(const CBioseq& seq)
144 {
145 _ASSERT(!seq.GetId().empty());
146 const CSeq_inst& inst = seq.GetInst();
147 const string& seqdata = inst.GetSeq_data().GetIupacna().Get();
148 _ASSERT(seqdata.size() == inst.GetLength());
149 ITERATE ( string, i, seqdata ) {
150 _ASSERT(*i >= 'A' && *i <= 'Z');
151 }
152 }
153
s_AsFASTA(const CBioseq & seq)154 string s_AsFASTA(const CBioseq& seq)
155 {
156 const CSeq_inst& inst = seq.GetInst();
157 const string& seqdata = inst.GetSeq_data().GetIupacna().Get();
158 return seq.GetId().front()->AsFastaString()+' '+seqdata;
159 }
160
Thread_Run(int idx)161 bool CCSRATestApp::Thread_Run(int idx)
162 {
163 CRandom random(m_Seed+idx);
164 _ASSERT(!m_Accession.empty());
165 _ASSERT(m_IterCount);
166 _ASSERT(m_IterSize);
167 for ( int ti = 0; ti < m_IterCount; ++ti ) {
168 size_t index = random.GetRandIndexSize_t(m_Accession.size());
169 const string& acc = m_Accession[index];
170 if ( m_Verbose ) {
171 LOG_POST(Info<<"T"<<idx<<"."<<ti<<": acc["<<index<<"] "<<acc);
172 }
173 CCSraDb csra(m_Mgr, acc);
174 if ( !m_MaxSpotId[index] ) {
175 DEFINE_STATIC_FAST_MUTEX(s_mutex);
176 CFastMutexGuard guard(s_mutex);
177 m_MaxSpotId[index] = CCSraShortReadIterator(csra).GetMaxSpotId();
178 if ( m_Verbose ) {
179 LOG_POST(Info<<"T"<<idx<<"."<<ti<<": acc["<<index<<"] "<<acc
180 <<": max id = " << m_MaxSpotId[index]);
181 }
182 _ASSERT(m_MaxSpotId[index] > 0);
183 }
184 TVDBRowId count = min(m_MaxSpotId[index], TVDBRowId(m_IterSize));
185 if ( count <= 0 ) {
186 continue;
187 }
188 TVDBRowId start_id = random.GetRandUint8(1, m_MaxSpotId[index]-count);
189 TVDBRowId stop_id = start_id+count;
190 if ( m_Verbose ) {
191 LOG_POST(Info<<"T"<<idx<<"."<<ti<<": acc["<<index<<"] "<<acc
192 <<": scan " << start_id<<" - "<<(stop_id-1));
193 }
194 size_t seq_count = 0;
195 for ( CCSraShortReadIterator i(csra, start_id);
196 i && i.GetSpotId() < stop_id; ++i ) {
197 CRef<CBioseq> seq = i.GetShortBioseq();
198 s_Check(*seq);
199 ++seq_count;
200 if ( true ) {
201 if ( m_Verbose ) {
202 LOG_POST(Info<<"T"<<idx<<"."<<ti<<": acc["<<index<<"] "<<acc
203 <<": "<<s_AsFASTA(*seq));
204 }
205 }
206 else {
207 if ( m_Verbose ) {
208 LOG_POST(Info<<"T"<<idx<<"."<<ti<<": acc["<<index<<"] "<<acc
209 <<": "<<i.GetShortSeq_id()->AsFastaString());
210 }
211 }
212 }
213 _ASSERT(seq_count);
214 }
215 return true;
216 }
217
218
219 /////////////////////////////////////////////////////////////////////////////
220 // Cleanup
221
222
223 /////////////////////////////////////////////////////////////////////////////
224 // MAIN
225
226
main(int argc,const char * argv[])227 int main(int argc, const char* argv[])
228 {
229 // Execute main application function
230 return CCSRATestApp().AppMain(argc, argv);
231 }
232