1 /*  $Id: id_unit_test_bad.cpp 608765 2020-05-20 19:11:58Z vasilche $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors:  Eugene Vasilchenko
27 *
28 * File Description:
29 *   Unit test for data loading from ID.
30 */
31 
32 #define NCBI_TEST_APPLICATION
33 
34 #include <ncbi_pch.hpp>
35 #include <objmgr/scope.hpp>
36 #include <objmgr/bioseq_handle.hpp>
37 #include <objmgr/seq_vector.hpp>
38 #include <objmgr/seqdesc_ci.hpp>
39 #include <objmgr/feat_ci.hpp>
40 #include <objmgr/align_ci.hpp>
41 #include <objmgr/graph_ci.hpp>
42 #include <objmgr/annot_ci.hpp>
43 #include <objtools/data_loaders/genbank/gbloader.hpp>
44 #include <objtools/data_loaders/genbank/readers.hpp>
45 
46 #include <corelib/ncbi_system.hpp>
47 #include <dbapi/driver/drivers.hpp>
48 #include <connect/ncbi_core_cxx.hpp>
49 #include <connect/ncbi_util.h>
50 #include <algorithm>
51 #include <numeric>
52 
53 #include <objects/general/general__.hpp>
54 #include <objects/seqfeat/seqfeat__.hpp>
55 #include <serial/iterator.hpp>
56 #include <util/random_gen.hpp>
57 
58 #include <corelib/test_boost.hpp>
59 
60 USING_NCBI_SCOPE;
61 USING_SCOPE(objects);
62 
63 
64 NCBI_PARAM_DECL(bool, TEST, TRACE);
65 NCBI_PARAM_DEF_EX(bool, TEST, TRACE, false,
66                   eParam_NoThread, TEST_TRACE);
67 
68 
69 static
s_IsTraceEnabled()70 bool s_IsTraceEnabled()
71 {
72     static bool trace_enabled = NCBI_PARAM_TYPE(TEST, TRACE)::GetDefault();
73     return trace_enabled;
74 }
75 
76 
77 #define TEST_TRACE_POST(msg) if ( !s_IsTraceEnabled() ); else LOG_POST(msg)
78 
79 
80 static CRandom s_Random;
81 
82 
83 template<class I>
s_RandomShuffle(I iter1,I iter2)84 void s_RandomShuffle(I iter1, I iter2)
85 {
86     for ( int s = int(distance(iter1, iter2)); s > 1; --s, ++iter1 ) {
87         swap(*iter1, *next(iter1, s_Random.GetRandIndex(s)));
88     }
89 }
90 
91 
s_InitScope(bool reset_loader=true)92 static CRef<CScope> s_InitScope(bool reset_loader = true)
93 {
94     CRef<CObjectManager> om = CObjectManager::GetInstance();
95     if ( reset_loader ) {
96         CDataLoader* loader =
97             om->FindDataLoader(CGBDataLoader::GetLoaderNameFromArgs());
98         if ( loader ) {
99             BOOST_CHECK(om->RevokeDataLoader(*loader));
100         }
101     }
102 #ifdef HAVE_PUBSEQ_OS
103     DBAPI_RegisterDriver_FTDS();
104     GenBankReaders_Register_Pubseq();
105     GenBankReaders_Register_Pubseq2();
106 #endif
107     CGBDataLoader::RegisterInObjectManager(*om);
108     CRef<CScope> scope(new CScope(*om));
109     scope->AddDefaults();
110     return scope;
111 }
112 
113 
114 template<class E>
s_AsString(const vector<E> & ids)115 string s_AsString(const vector<E>& ids)
116 {
117     CNcbiOstrstream out;
118     out << '{';
119     for ( auto& e : ids ) {
120         out << ' ' << e;
121     }
122     out << " }";
123     return CNcbiOstrstreamToString(out);
124 }
125 
126 
127 static
s_GetGBReader()128 const char* s_GetGBReader()
129 {
130     const char* env = getenv("GENBANK_LOADER_METHOD_BASE");
131     if ( !env ) {
132         env = getenv("GENBANK_LOADER_METHOD");
133     }
134     if ( !env ) {
135         // assume default ID2
136         TEST_TRACE_POST("Assuming default reader ID2");
137         env = "ID2";
138     }
139     return env;
140 }
141 
142 
s_CalcHaveID2(void)143 bool s_CalcHaveID2(void)
144 {
145     const char* env = s_GetGBReader();
146     if ( NStr::EndsWith(env, "id1", NStr::eNocase) ||
147          NStr::EndsWith(env, "pubseqos", NStr::eNocase) ) {
148         // non-ID2 based readers
149         TEST_TRACE_POST("No ID2, env=\""<<env<<"\"");
150         return false;
151     }
152     else {
153         TEST_TRACE_POST("ID2, env=\""<<env<<"\"");
154         return true;
155     }
156 }
157 
158 
s_CalcHaveID1(void)159 bool s_CalcHaveID1(void)
160 {
161     const char* env = s_GetGBReader();
162     if ( NStr::EndsWith(env, "id1", NStr::eNocase) ) {
163         TEST_TRACE_POST("ID1, env=\""<<env<<"\"");
164         return true;
165     }
166     else {
167         TEST_TRACE_POST("No ID1, env=\""<<env<<"\"");
168         return false;
169     }
170 }
171 
172 
s_HaveID2(void)173 bool s_HaveID2(void)
174 {
175     static bool ret = s_CalcHaveID2();
176     return ret;
177 }
178 
179 
s_HaveID1(void)180 bool s_HaveID1(void)
181 {
182     static bool ret = s_CalcHaveID1();
183     return ret;
184 }
185 
186 
187 static const CScope::TGetFlags kThrowNoData =
188            CScope::fForceLoad | CScope::fThrowOnMissingData;
189 static const CScope::TGetFlags kThrowNoSeq =
190            CScope::fForceLoad | CScope::fThrowOnMissingSequence;
191 
192 // for various orders of operations:
193 // get fresh scope -> iterate 2 times ->
194 // iterate over all operations -> iterate 2 times the operation
195 
196 // all_orders<> - iterate all possible orders of operations
197 // random_orders<> - iterate several random orders of operations
198 
199 struct scope_operation
200 {
201     int op;
202     bool last;
203     CScope* scope;
204 
scope_operationscope_operation205     scope_operation(int op, bool last, const CRef<CScope>& scope)
206         : op(op), last(last), scope(scope.GetNCPointerOrNull())
207         {
208         }
209 
operator ->scope_operation210     CScope* operator->() const { return scope; }
211 };
212 
213 template<int N_OPS>
214 struct all_orders {
215     enum AtEnd {
216         at_end
217     };
218     static const int kCount0 = 1;
219     static const int kCount1 = 2;
220     static const int kCount2 = 2;
221 
222     typedef scope_operation value_type;
223 
224     struct const_iterator {
const_iteratorall_orders::const_iterator225         const_iterator()
226             : t0(0), t1(0), t2(0), i(0)
227 
228             {
229                 iota(std::begin(ops),
230                      std::end(ops),
231                      0);
232                 start_scope();
233             }
234         explicit
const_iteratorall_orders::const_iterator235         const_iterator(AtEnd)
236             : t0(kCount0), t1(0), t2(0), i(0)
237             {
238             }
239 
start_scopeall_orders::const_iterator240         void start_scope()
241             {
242                 TEST_TRACE_POST("Start");
243                 scope = s_InitScope();
244             }
245 
operator ==all_orders::const_iterator246         bool operator==(const_iterator& b) const
247             {
248                 return (t0 == b.t0 &&
249                         t1 == b.t1 &&
250                         t2 == b.t2 &&
251                         i == b.i);
252             }
operator !=all_orders::const_iterator253         bool operator!=(const_iterator& b) const
254             {
255                 return !(*this == b);
256             }
257 
operator *all_orders::const_iterator258         value_type operator*() const
259             {
260                 return value_type(ops[i], i == N_OPS-1, scope);
261             }
262 
operator ++all_orders::const_iterator263         const_iterator& operator++()
264             {
265                 if ( ++t2 < kCount2 ) {
266                     return *this;
267                 }
268                 t2 = 0;
269                 if ( ++i < N_OPS ) {
270                     return *this;
271                 }
272                 i = 0;
273                 if ( ++t1 < kCount1 ) {
274                     return *this;
275                 }
276                 t1 = 0;
277                 scope = null;
278                 if ( next_permutation(std::begin(ops),
279                                       std::end(ops)) ) {
280                     start_scope();
281                     return *this;
282                 }
283                 if ( ++t0 < kCount0 ) {
284                     start_scope();
285                     return *this;
286                 }
287                 return *this;
288             }
289 
290     private:
291         int t0, t1, t2, i;
292         int ops[N_OPS];
293         CRef<CScope> scope;
294     };
295 
beginall_orders296     const_iterator begin() const
297         {
298             return const_iterator();
299         }
endall_orders300     const_iterator end() const
301         {
302             return const_iterator(at_end);
303         }
304 };
305 
306 
307 template<int N_OPS>
308 struct random_orders {
309     enum AtEnd {
310         at_end
311     };
312     static const int kCount1 = 2;
313     static const int kCount2 = 2;
314 
315     typedef scope_operation value_type;
316 
317     struct const_iterator {
318         explicit
const_iteratorrandom_orders::const_iterator319         const_iterator(int count0)
320             : t0(0), count0(count0), t1(0), t2(0), i(0)
321 
322             {
323                 iota(std::begin(ops),
324                      std::end(ops),
325                      0);
326                 start_scope();
327             }
const_iteratorrandom_orders::const_iterator328         const_iterator(AtEnd, int count0)
329             : t0(count0), count0(count0), t1(0), t2(0), i(0)
330             {
331             }
332 
start_scoperandom_orders::const_iterator333         void start_scope()
334             {
335                 TEST_TRACE_POST("Start");
336                 s_RandomShuffle (std::begin(ops), std::end(ops));
337                 scope = s_InitScope();
338             }
339 
operator ==random_orders::const_iterator340         bool operator==(const_iterator& b) const
341             {
342                 return (t0 == b.t0 &&
343                         t1 == b.t1 &&
344                         t2 == b.t2 &&
345                         i == b.i);
346             }
operator !=random_orders::const_iterator347         bool operator!=(const_iterator& b) const
348             {
349                 return !(*this == b);
350             }
351 
operator *random_orders::const_iterator352         value_type operator*() const
353             {
354                 return value_type(ops[i], i == N_OPS-1, scope);
355             }
356 
operator ++random_orders::const_iterator357         const_iterator& operator++()
358             {
359                 if ( ++t2 < kCount2 ) {
360                     return *this;
361                 }
362                 t2 = 0;
363                 if ( ++i < N_OPS ) {
364                     return *this;
365                 }
366                 i = 0;
367                 if ( ++t1 < kCount1 ) {
368                     return *this;
369                 }
370                 t1 = 0;
371                 scope = null;
372                 if ( ++t0 < count0 ) {
373                     start_scope();
374                 }
375                 return *this;
376             }
377 
378     private:
379         int t0, count0, t1, t2, i;
380         int ops[N_OPS];
381         CRef<CScope> scope;
382     };
383 
beginrandom_orders384     const_iterator begin() const
385         {
386             return const_iterator(count0);
387         }
endrandom_orders388     const_iterator end() const
389         {
390             return const_iterator(at_end, count0);
391         }
392 
393     explicit
random_ordersrandom_orders394     random_orders(int count0)
395         : count0(count0)
396         {
397         }
398 
399 private:
400     int count0;
401 };
402 
403 
BOOST_AUTO_TEST_CASE(CheckNoSeqGi)404 BOOST_AUTO_TEST_CASE(CheckNoSeqGi)
405 {
406     // no sequence, check GI loading methods
407     // should work with all readers
408     CSeq_id_Handle id = CSeq_id_Handle::GetGiHandle(GI_CONST(1));
409     vector<CSeq_id_Handle> idvec(1, id);
410     LOG_POST("CheckNoSeqGi: "<<id);
411     for ( auto op : random_orders<4>(10) ) {
412         switch ( op.op ) {
413         case 0:
414             TEST_TRACE_POST("GetGi");
415             BOOST_CHECK(op->GetGi(id, kThrowNoData) == ZERO_GI);
416             break;
417         case 1:
418             TEST_TRACE_POST("GetGiThrow");
419             BOOST_CHECK_THROW(op->GetGi(id, kThrowNoSeq), CObjMgrException);
420             break;
421         case 2:
422             TEST_TRACE_POST("GetGiBulk");
423             BOOST_CHECK(op->GetGis(idvec, kThrowNoData)[0] == ZERO_GI);
424             break;
425         case 3:
426             TEST_TRACE_POST("GetGiBulkThrow");
427             BOOST_CHECK_THROW(op->GetGis(idvec, kThrowNoSeq), CObjMgrException);
428             break;
429         }
430         if ( op.last ) {
431             TEST_TRACE_POST("GetIds");
432             BOOST_CHECK(op->GetIds(id).empty());
433         }
434     }
435 }
436 
437 
BOOST_AUTO_TEST_CASE(CheckNoSeqAcc)438 BOOST_AUTO_TEST_CASE(CheckNoSeqAcc)
439 {
440     // no sequence, check acc loading methods
441     // should work with all readers
442     CSeq_id_Handle id = CSeq_id_Handle::GetGiHandle(GI_CONST(1));
443     vector<CSeq_id_Handle> idvec(1, id);
444     LOG_POST("CheckNoSeqAcc: "<<id);
445     for ( auto op : random_orders<4>(10) ) {
446         switch ( op.op ) {
447         case 0:
448             TEST_TRACE_POST("GetAccVer");
449             BOOST_CHECK(!op->GetAccVer(id, kThrowNoData));
450             break;
451         case 1:
452             TEST_TRACE_POST("GetAccVerThrow");
453             BOOST_CHECK_THROW(op->GetAccVer(id, kThrowNoSeq), CObjMgrException);
454             break;
455         case 2:
456             TEST_TRACE_POST("GetAccVerBulk");
457             BOOST_CHECK(!op->GetAccVers(idvec, kThrowNoData)[0]);
458             break;
459         case 3:
460             TEST_TRACE_POST("GetAccVerBulkThrow");
461             BOOST_CHECK_THROW(op->GetAccVers(idvec, kThrowNoSeq), CObjMgrException);
462             break;
463         }
464         if ( op.last ) {
465             TEST_TRACE_POST("GetIds");
466             BOOST_CHECK(op->GetIds(id).empty());
467         }
468     }
469 }
470 
471 
BOOST_AUTO_TEST_CASE(CheckNoSeqAll)472 BOOST_AUTO_TEST_CASE(CheckNoSeqAll)
473 {
474     // no sequence, check all loading methods
475     // should work with all readers
476     CSeq_id_Handle id = CSeq_id_Handle::GetGiHandle(GI_CONST(1));
477     vector<CSeq_id_Handle> idvec(1, id);
478     LOG_POST("CheckNoSeq: "<<id);
479     for ( auto op : random_orders<8>(10) ) {
480         switch ( op.op ) {
481         case 0:
482             TEST_TRACE_POST("GetAccVer");
483             BOOST_CHECK(!op->GetAccVer(id));
484             break;
485         case 1:
486             TEST_TRACE_POST("GetAccVerBulk");
487             BOOST_CHECK(!op->GetAccVers(idvec, kThrowNoData)[0]);
488             break;
489         case 2:
490             TEST_TRACE_POST("GetAccVerBulkThrow");
491             BOOST_CHECK_THROW(op->GetAccVers(idvec, kThrowNoSeq), CObjMgrException);
492             break;
493         case 3:
494             TEST_TRACE_POST("GetGi");
495             BOOST_CHECK(op->GetGi(id) == ZERO_GI);
496             break;
497         case 4:
498             TEST_TRACE_POST("GetGiBulk");
499             BOOST_CHECK(op->GetGis(idvec, kThrowNoData)[0] == ZERO_GI);
500             break;
501         case 5:
502             TEST_TRACE_POST("GetGiBulkThrow");
503             BOOST_CHECK_THROW(op->GetGis(idvec, kThrowNoSeq), CObjMgrException);
504             break;
505         case 6:
506             TEST_TRACE_POST("GetIds");
507             BOOST_CHECK(op->GetIds(id).empty());
508             break;
509         case 7:
510             TEST_TRACE_POST("GetBioseqHandle");
511             BOOST_CHECK(!op->GetBioseqHandle(id));
512             break;
513         }
514     }
515 }
516 
517 
BOOST_AUTO_TEST_CASE(CheckNoAcc)518 BOOST_AUTO_TEST_CASE(CheckNoAcc)
519 {
520     // have GI, have no accession
521     // should work with all readers
522     CSeq_id_Handle id = CSeq_id_Handle::GetGiHandle(GI_CONST(156205));
523     vector<CSeq_id_Handle> idvec(1, id);
524     LOG_POST("CheckNoAcc: "<<id);
525     for ( auto op : random_orders<4>(10) ) {
526         switch ( op.op ) {
527         case 0:
528             TEST_TRACE_POST("GetAccVer");
529             BOOST_CHECK(!op->GetAccVer(id, kThrowNoSeq));
530             break;
531         case 1:
532             TEST_TRACE_POST("GetAccVer");
533             BOOST_CHECK_THROW(op->GetAccVer(id, kThrowNoData), CObjMgrException);
534             break;
535         case 2:
536             TEST_TRACE_POST("GetAccVerBulk");
537             BOOST_CHECK(!op->GetAccVers(idvec, kThrowNoSeq)[0]);
538             break;
539         case 3:
540             TEST_TRACE_POST("GetAccVerBulkThrow");
541             BOOST_CHECK_THROW(op->GetAccVers(idvec, kThrowNoData), CObjMgrException);
542             break;
543         }
544         if ( op.last ) {
545             TEST_TRACE_POST("GetGi");
546             BOOST_CHECK(op->GetGi(id, kThrowNoSeq) != ZERO_GI);
547             TEST_TRACE_POST("GetIds");
548             BOOST_CHECK(!op->GetIds(id).empty());
549             TEST_TRACE_POST("GetBioseqHandle");
550             BOOST_CHECK(op->GetBioseqHandle(id));
551         }
552     }
553 }
554 
555 
NCBITEST_INIT_TREE()556 NCBITEST_INIT_TREE()
557 {
558     if ( s_Random.GetSeed() == 0 ) {
559         s_Random.Randomize();
560         LOG_POST("Random seed: "<<s_Random.GetSeed());
561     }
562 }
563