1 /*  $Id: csra_test_mt.cpp 497463 2016-04-06 19:10:32Z vasilche $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Eugene Vasilchenko
27  *
28  * File Description:
29  *   Sample test application for cSRA reader
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbifile.hpp>
36 #include <corelib/ncbi_system.hpp>
37 #include <corelib/test_mt.hpp>
38 #include <util/random_gen.hpp>
39 #include <sra/readers/sra/csraread.hpp>
40 #include <sra/readers/ncbi_traces_path.hpp>
41 
42 #include <objects/general/general__.hpp>
43 #include <objects/seq/seq__.hpp>
44 #include <objects/seqset/seqset__.hpp>
45 #include <objects/seqalign/seqalign__.hpp>
46 #include <objects/seqres/seqres__.hpp>
47 
48 #include <serial/serial.hpp>
49 #include <serial/iterator.hpp>
50 #include <serial/objostrasnb.hpp>
51 #include <serial/objistrasnb.hpp>
52 
53 #include <common/test_assert.h>  /* This header must go last */
54 
55 USING_NCBI_SCOPE;
56 USING_SCOPE(objects);
57 
58 /////////////////////////////////////////////////////////////////////////////
59 //  CCSRATestApp::
60 
61 
62 class CCSRATestApp : public CThreadedApp
63 {
64 private:
65     virtual bool Thread_Run(int idx);
66     virtual bool TestApp_Init(void);
67     virtual bool TestApp_Exit(void);
68     virtual bool TestApp_Args(CArgDescriptions& args);
69 
70     bool m_Verbose;
71     int m_Seed;
72     int m_IterCount, m_IterSize;
73     int m_ErrorCount;
74     vector<string> m_Accession;
75     vector<TVDBRowId> m_MaxSpotId;
76 
77     CVDBMgr m_Mgr;
78 };
79 
80 
81 /////////////////////////////////////////////////////////////////////////////
82 //  Init test
TestApp_Args(CArgDescriptions & args)83 bool CCSRATestApp::TestApp_Args(CArgDescriptions& args)
84 {
85     // Specify USAGE context
86     args.SetUsageContext(GetArguments().GetProgramBasename(),
87                          "csra_test_mt");
88 
89     args.AddDefaultKey("accs", "Accessions",
90                        "comma separated SRA accession list",
91                        CArgDescriptions::eString,
92                        "SRR000010,SRR389414,SRR494733,SRR505887,SRR035417");
93     args.AddDefaultKey("iter_count", "IterationCount",
94                        "Number of read iterations",
95                        CArgDescriptions::eInteger,
96                        "10");
97     args.AddDefaultKey("iter_size", "IterationSize",
98                        "Number of sequential sequences in one iteration",
99                        CArgDescriptions::eInteger,
100                        "10");
101     args.AddFlag("verbose", "Print info about progress");
102 
103     return true;
104 }
105 
106 
TestApp_Init(void)107 bool CCSRATestApp::TestApp_Init(void)
108 {
109     SetDiagPostLevel(eDiag_Info);
110     const CArgs& args = GetArgs();
111     m_Verbose = args["verbose"];
112     m_ErrorCount = 0;
113     m_Seed = args["seed"]? args["seed"].AsInteger(): int(time(0));
114     if ( m_Verbose ) {
115         LOG_POST(Info<<"Seed: "<<m_Seed);
116     }
117     NStr::Split(args["accs"].AsString(), ",", m_Accession);
118     if ( m_Accession.empty() ) {
119         ERR_POST(Fatal<<"empty accession list");
120     }
121     m_IterCount = args["iter_count"].AsInteger();
122     m_IterSize = args["iter_size"].AsInteger();
123     m_MaxSpotId.assign(m_Accession.size(), 0);
124     return true;
125 }
126 
127 
TestApp_Exit(void)128 bool CCSRATestApp::TestApp_Exit(void)
129 {
130     if ( m_ErrorCount ) {
131         ERR_POST("Errors found: "<<m_ErrorCount);
132     }
133     else {
134         LOG_POST("Done.");
135     }
136     return !m_ErrorCount;
137 }
138 
139 /////////////////////////////////////////////////////////////////////////////
140 //  Run test
141 /////////////////////////////////////////////////////////////////////////////
142 
s_Check(const CBioseq & seq)143 void s_Check(const CBioseq& seq)
144 {
145     _ASSERT(!seq.GetId().empty());
146     const CSeq_inst& inst = seq.GetInst();
147     const string& seqdata = inst.GetSeq_data().GetIupacna().Get();
148     _ASSERT(seqdata.size() == inst.GetLength());
149     ITERATE ( string, i, seqdata ) {
150         _ASSERT(*i >= 'A' && *i <= 'Z');
151     }
152 }
153 
s_AsFASTA(const CBioseq & seq)154 string s_AsFASTA(const CBioseq& seq)
155 {
156     const CSeq_inst& inst = seq.GetInst();
157     const string& seqdata = inst.GetSeq_data().GetIupacna().Get();
158     return seq.GetId().front()->AsFastaString()+' '+seqdata;
159 }
160 
Thread_Run(int idx)161 bool CCSRATestApp::Thread_Run(int idx)
162 {
163     CRandom random(m_Seed+idx);
164     _ASSERT(!m_Accession.empty());
165     _ASSERT(m_IterCount);
166     _ASSERT(m_IterSize);
167     for ( int ti = 0; ti < m_IterCount; ++ti ) {
168         size_t index = random.GetRandIndexSize_t(m_Accession.size());
169         const string& acc = m_Accession[index];
170         if ( m_Verbose ) {
171             LOG_POST(Info<<"T"<<idx<<"."<<ti<<": acc["<<index<<"] "<<acc);
172         }
173         CCSraDb csra(m_Mgr, acc);
174         if ( !m_MaxSpotId[index] ) {
175             DEFINE_STATIC_FAST_MUTEX(s_mutex);
176             CFastMutexGuard guard(s_mutex);
177             m_MaxSpotId[index] = CCSraShortReadIterator(csra).GetMaxSpotId();
178             if ( m_Verbose ) {
179                 LOG_POST(Info<<"T"<<idx<<"."<<ti<<": acc["<<index<<"] "<<acc
180                          <<": max id = " << m_MaxSpotId[index]);
181             }
182             _ASSERT(m_MaxSpotId[index] > 0);
183         }
184         TVDBRowId count = min(m_MaxSpotId[index], TVDBRowId(m_IterSize));
185         if ( count <= 0 ) {
186             continue;
187         }
188         TVDBRowId start_id = random.GetRandUint8(1, m_MaxSpotId[index]-count);
189         TVDBRowId stop_id = start_id+count;
190         if ( m_Verbose ) {
191             LOG_POST(Info<<"T"<<idx<<"."<<ti<<": acc["<<index<<"] "<<acc
192                      <<": scan " << start_id<<" - "<<(stop_id-1));
193         }
194         size_t seq_count = 0;
195         for ( CCSraShortReadIterator i(csra, start_id);
196               i && i.GetSpotId() < stop_id; ++i ) {
197             CRef<CBioseq> seq = i.GetShortBioseq();
198             s_Check(*seq);
199             ++seq_count;
200             if ( true ) {
201                 if ( m_Verbose ) {
202                     LOG_POST(Info<<"T"<<idx<<"."<<ti<<": acc["<<index<<"] "<<acc
203                              <<": "<<s_AsFASTA(*seq));
204                 }
205             }
206             else {
207                 if ( m_Verbose ) {
208                     LOG_POST(Info<<"T"<<idx<<"."<<ti<<": acc["<<index<<"] "<<acc
209                              <<": "<<i.GetShortSeq_id()->AsFastaString());
210                 }
211             }
212         }
213         _ASSERT(seq_count);
214     }
215     return true;
216 }
217 
218 
219 /////////////////////////////////////////////////////////////////////////////
220 //  Cleanup
221 
222 
223 /////////////////////////////////////////////////////////////////////////////
224 //  MAIN
225 
226 
main(int argc,const char * argv[])227 int main(int argc, const char* argv[])
228 {
229     // Execute main application function
230     return CCSRATestApp().AppMain(argc, argv);
231 }
232