1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 /**
28 * Unit tests for the pileup-estimator
29 */
30 
31 #include <limits>	/* for std::numeric_limits<uint64_t>::max() in test #14 */
32 
33 #include <ktst/unit_test.hpp>
34 
35 #include <stdexcept>
36 #include <string>
37 
38 #include <klib/rc.h>
39 #include <klib/text.h>
40 
41 #include <vdb/manager.h>
42 #include <vdb/database.h>
43 #include <vdb/table.h>
44 #include <vdb/cursor.h>
45 
46 #include <align/reference.h>
47 #include <align/manager.h>
48 #include <align/iterator.h>
49 
50 #include <align/unsupported_pileup_estimator.h>
51 
52 #include <ngs/ncbi/NGS.hpp> // openReadCollection
53 #include <ngs/ErrorMsg.hpp>
54 #include <ngs/ReadCollection.hpp>
55 #include <ngs/Reference.hpp>
56 #include <ngs/Alignment.hpp>
57 #include <ngs/PileupIterator.hpp>
58 
59 using namespace std;
60 
calc_coverage_sum_using_ref_iter(const char * acc,const char * refname,uint64_t slice_start,uint32_t slice_len,uint64_t * result)61 static rc_t calc_coverage_sum_using_ref_iter( const char * acc, const char * refname,
62                                                uint64_t slice_start, uint32_t slice_len, uint64_t * result )
63 {
64     const VDBManager *mgr;
65     rc_t rc = VDBManagerMakeRead( &mgr, NULL );
66     if ( rc == 0 )
67     {
68         const VDatabase *db;
69         rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", acc );
70         if ( rc == 0 )
71         {
72             const VTable * tbl;
73             rc = VDatabaseOpenTableRead( db, &tbl, "%s", "PRIMARY_ALIGNMENT" );
74             if ( rc == 0 )
75             {
76                 const VCursor * prim_curs;
77                 rc = VTableCreateCursorRead( tbl, &prim_curs );
78                 if ( rc == 0 )
79                 {
80                     const ReferenceList *ref_list;
81                     uint32_t reflist_options =  ereferencelist_usePrimaryIds;
82                     rc = ReferenceList_MakeDatabase( &ref_list, db, reflist_options, 0, NULL, 0 ); /* align/reference.h */
83                     if ( rc == 0 )
84                     {
85                         const ReferenceObj * ref_obj;
86                         rc = ReferenceList_Find( ref_list, &ref_obj, refname, string_size( refname ) ); /* align/reference.h */
87                         {
88                             const AlignMgr * a_mgr;
89                             rc = AlignMgrMakeRead( &a_mgr );   /* align/manager.h */
90                             if ( rc == 0 )
91                             {
92                                 ReferenceIterator * ref_iter;
93                                 rc = AlignMgrMakeReferenceIterator( a_mgr, &ref_iter, NULL, 0 );
94                                 if ( rc == 0 )
95                                 {
96                                     rc = ReferenceIteratorAddPlacements( ref_iter,       /* the outer ref-iter */
97                                                                          ref_obj,        /* the ref-obj for this chromosome */
98                                                                          slice_start,    /* start ( zero-based ) */
99                                                                          slice_len,      /* length */
100                                                                          NULL,          /* ref-cursor */
101                                                                          prim_curs,     /* align-cursor */
102                                                                          primary_align_ids,    /* which id's */
103                                                                          NULL,         /* what read-group */
104                                                                          NULL );       /* placement-context */
105                                     while ( rc == 0 )
106                                     {
107                                         rc = ReferenceIteratorNextReference( ref_iter, NULL, NULL, NULL );
108                                         rc_t rc1 = 0;
109                                         while ( rc == 0 && rc1 == 0 )
110                                         {
111                                             INSDC_coord_zero first_pos;
112                                             INSDC_coord_len len;
113                                             rc1 = ReferenceIteratorNextWindow( ref_iter, &first_pos, &len );
114                                             rc_t rc2 = 0;
115                                             while ( rc1 == 0 && rc2 == 0 )
116                                             {
117                                                 rc2 = ReferenceIteratorNextPos( ref_iter, true ); /* do skip empty positions */
118                                                 if ( rc2 == 0 )
119                                                 {
120                                                     uint32_t depth;
121                                                     rc2 = ReferenceIteratorPosition( ref_iter, NULL, &depth, NULL );
122                                                     if ( rc2 == 0 )
123                                                         *result += depth;
124                                                 }
125                                             }
126                                         }
127                                     }
128                                     if ( GetRCState( rc ) == rcDone ) rc = 0;
129                                     ReferenceIteratorRelease( ref_iter );
130                                 }
131                                 AlignMgrRelease( a_mgr );
132                             }
133                             ReferenceObj_Release( ref_obj );
134                         }
135                         ReferenceList_Release( ref_list );
136                     }
137                     VCursorRelease( prim_curs );
138                 }
139                 VTableRelease ( tbl );
140             }
141             VDatabaseRelease( db );
142         }
143         VDBManagerRelease( mgr );
144     }
145     return rc;
146 }
147 
calc_coverage_using_ref_iter(const char * acc,const char * refname,uint64_t slice_start,uint32_t slice_len,uint32_t * coverage)148 static rc_t calc_coverage_using_ref_iter( const char * acc, const char * refname,
149                                           uint64_t slice_start, uint32_t slice_len, uint32_t * coverage )
150 {
151     const VDBManager *mgr;
152     rc_t rc = VDBManagerMakeRead( &mgr, NULL );
153     if ( rc == 0 )
154     {
155         const VDatabase *db;
156         rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", acc );
157         if ( rc == 0 )
158         {
159             const VTable * tbl;
160             rc = VDatabaseOpenTableRead( db, &tbl, "%s", "PRIMARY_ALIGNMENT" );
161             if ( rc == 0 )
162             {
163                 const VCursor * prim_curs;
164                 rc = VTableCreateCursorRead( tbl, &prim_curs );
165                 if ( rc == 0 )
166                 {
167                     const ReferenceList *ref_list;
168                     uint32_t reflist_options =  ereferencelist_usePrimaryIds;
169                     rc = ReferenceList_MakeDatabase( &ref_list, db, reflist_options, 0, NULL, 0 ); /* align/reference.h */
170                     if ( rc == 0 )
171                     {
172                         const ReferenceObj * ref_obj;
173                         rc = ReferenceList_Find( ref_list, &ref_obj, refname, string_size( refname ) ); /* align/reference.h */
174                         {
175                             const AlignMgr * a_mgr;
176                             rc = AlignMgrMakeRead( &a_mgr );   /* align/manager.h */
177                             if ( rc == 0 )
178                             {
179                                 ReferenceIterator * ref_iter;
180                                 rc = AlignMgrMakeReferenceIterator( a_mgr, &ref_iter, NULL, 0 );
181                                 if ( rc == 0 )
182                                 {
183                                     memset( coverage, 0, slice_len * ( sizeof *coverage ) );
184                                     rc = ReferenceIteratorAddPlacements( ref_iter,       /* the outer ref-iter */
185                                                                          ref_obj,        /* the ref-obj for this chromosome */
186                                                                          slice_start,    /* start ( zero-based ) */
187                                                                          slice_len,      /* length */
188                                                                          NULL,          /* ref-cursor */
189                                                                          prim_curs,     /* align-cursor */
190                                                                          primary_align_ids,    /* which id's */
191                                                                          NULL,         /* what read-group */
192                                                                          NULL );       /* placement-context */
193                                     while ( rc == 0 )
194                                     {
195                                         rc = ReferenceIteratorNextReference( ref_iter, NULL, NULL, NULL );
196                                         rc_t rc1 = 0;
197                                         while ( rc == 0 && rc1 == 0 )
198                                         {
199                                             INSDC_coord_zero first_pos;
200                                             INSDC_coord_len len;
201                                             rc1 = ReferenceIteratorNextWindow( ref_iter, &first_pos, &len );
202                                             rc_t rc2 = 0;
203                                             while ( rc1 == 0 && rc2 == 0 )
204                                             {
205                                                 rc2 = ReferenceIteratorNextPos( ref_iter, true ); /* do skip empty positions */
206                                                 if ( rc2 == 0 )
207                                                 {
208                                                     uint32_t depth;
209                                                     INSDC_coord_zero this_pos;
210                                                     rc2 = ReferenceIteratorPosition( ref_iter, &this_pos, &depth, NULL );
211                                                     if ( rc == 0 )
212                                                     {
213                                                         int64_t ofs = ( this_pos - slice_start );
214                                                         coverage[ ofs ] = depth;
215                                                     }
216                                                 }
217                                             }
218                                         }
219                                     }
220                                     if ( GetRCState( rc ) == rcDone ) rc = 0;
221                                     ReferenceIteratorRelease( ref_iter );
222                                 }
223                                 AlignMgrRelease( a_mgr );
224                             }
225                             ReferenceObj_Release( ref_obj );
226                         }
227                         ReferenceList_Release( ref_list );
228                     }
229                     VCursorRelease( prim_curs );
230                 }
231                 VTableRelease ( tbl );
232             }
233             VDatabaseRelease( db );
234         }
235         VDBManagerRelease( mgr );
236     }
237     return rc;
238 }
239 
240 
calc_coverage_using_ngs(ngs::String acc,ngs::String refname,uint64_t slice_start,uint32_t slice_len,uint32_t * coverage)241 static void calc_coverage_using_ngs( ngs::String acc, ngs::String refname,
242                                      uint64_t slice_start, uint32_t slice_len, uint32_t * coverage )
243 {
244     try
245     {
246         ngs::ReadCollection run( ncbi::NGS::openReadCollection( acc ) );
247         ngs::Reference ref = run.getReference ( refname );
248         ngs::PileupIterator it = ref.getPileupSlice( slice_start, slice_len );
249         while ( it.nextPileup() )
250         {
251             uint32_t depth( it.getPileupDepth() );
252             if ( depth > 0 )
253             {
254                 uint64_t pos( it.getReferencePosition() );
255                 int64_t ofs = ( pos - slice_start );
256                 if ( ofs >= 0 && ofs < slice_len )
257                     coverage[ ofs ] = depth;
258             }
259         }
260     }
261 
262     catch ( ngs::ErrorMsg & e ) {
263         cerr << "Error: " << e . toString () << endl;
264     }
265     catch ( std::exception & e ) {
266         cerr << "Error: " << e . what () << endl;
267     }
268     catch ( ... ) {
269         cerr << "Error: unknown exception" << endl;
270     }
271 }
272 
273 const char * ACC1 = "SRR341578";
274 const char * ACC1_REF = "NC_011748.1";
275 const uint64_t slice1_start = 3000002;
276 const uint32_t slice1_len   = 200;
277 
278 
279 TEST_SUITE( PileupEstimatorTestSuite );
280 
TEST_CASE(Estimator_1)281 TEST_CASE ( Estimator_1 )
282 {
283     std::cout << "Estimator-Test #1 ( estimator vs. ref_iter ) " << std::endl;
284 
285     uint64_t res1 = 0;
286 
287     struct PileupEstimator * estim;
288     rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
289     REQUIRE_RC( rc );
290     if ( rc == 0 )
291     {
292         String rname;
293         StringInitCString( &rname, ACC1_REF );
294 
295         rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len, &res1 );
296         REQUIRE_RC( rc );
297 
298         //std::cout << "result ( using RunPileupEstimator ) : " << res1 << std::endl;
299 
300         rc = ReleasePileupEstimator( estim );
301         REQUIRE_RC( rc );
302     }
303 
304     uint64_t res2 = 0;
305 
306     rc = calc_coverage_sum_using_ref_iter( ACC1, ACC1_REF, slice1_start, slice1_len, &res2 );
307     REQUIRE_RC( rc );
308 
309     //std::cout << "result ( using ReferenceIterator ) : " << res2 << std::endl;
310 
311     REQUIRE_EQUAL( res1, res2 );
312 }
313 
TEST_CASE(Estimator_2)314 TEST_CASE ( Estimator_2 )
315 {
316     std::cout << "Estimator-Test #2 ( no sources and no cursors )" << std::endl;
317 
318     // MakePileupEstimator has to fail when source and the cursors are NULL
319     struct PileupEstimator * estim;
320     rc_t rc = MakePileupEstimator( &estim, NULL, 0, NULL, NULL, 0, false );
321     REQUIRE_RC_FAIL( rc );
322 }
323 
TEST_CASE(Estimator_3)324 TEST_CASE ( Estimator_3 )
325 {
326     std::cout << "Estimator-Test #3 ( no self-ptr )" << std::endl;
327 
328     // MakePileupEstimator has to fail when given a NULL-ptr as *self
329     rc_t rc = MakePileupEstimator( NULL, ACC1, 0, NULL, NULL, 0, false );
330     REQUIRE_RC_FAIL( rc );
331 }
332 
make_cursor(const VDatabase * db,const VCursor ** curs,const char * tbl_name,uint32_t count,...)333 static rc_t make_cursor( const VDatabase *db, const VCursor ** curs, const char * tbl_name, uint32_t count, ... )
334 {
335     const VTable * tbl;
336     rc_t rc = VDatabaseOpenTableRead( db, &tbl, "%s", tbl_name );
337     if ( rc == 0 )
338     {
339         rc = VTableCreateCursorRead( tbl, curs );
340         if ( rc == 0 )
341         {
342             uint32_t i;
343             va_list args;
344             va_start( args, count );
345             for ( i = 0; rc == 0 && i < count; i++ )
346             {
347                 const char * colname = va_arg( args, const char * );
348                 uint32_t idx;
349                 rc = VCursorAddColumn( *curs, &idx, colname );
350             }
351             va_end( args );
352             if ( rc == 0 )
353                 rc = VCursorOpen( *curs );
354         }
355         VTableRelease( tbl );
356     }
357     return rc;
358 }
359 
360 
TEST_CASE(Estimator_4)361 TEST_CASE ( Estimator_4 )
362 {
363     std::cout << "Estimator-Test #4 ( with 2 cursors provided )" << std::endl;
364     const VDBManager *mgr;
365     rc_t rc = VDBManagerMakeRead( &mgr, NULL );
366     REQUIRE_RC( rc );
367     if ( rc == 0 )
368     {
369         const VDatabase *db;
370         rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", ACC1 );
371         REQUIRE_RC( rc );
372         if ( rc == 0 )
373         {
374             const VCursor * prim_curs;
375             rc = make_cursor( db, &prim_curs, "PRIMARY_ALIGNMENT", 2, "REF_POS", "REF_LEN" );
376             REQUIRE_RC( rc );
377             if ( rc == 0 )
378             {
379                 const VCursor * ref_curs;
380                 rc = make_cursor( db, &ref_curs, "REFERENCE", 4, "SEQ_ID", "SEQ_LEN", "MAX_SEQ_LEN", "PRIMARY_ALIGNMENT_IDS" );
381                 REQUIRE_RC( rc );
382                 if ( rc == 0 )
383                 {
384                     struct PileupEstimator * estim;
385                     rc_t rc = MakePileupEstimator( &estim, NULL, 0, ref_curs, prim_curs, 0, false );
386                     REQUIRE_RC( rc );
387                     if ( rc == 0 )
388                     {
389                         uint64_t res = 0;
390 
391                         String rname;
392                         StringInitCString( &rname, ACC1_REF );
393 
394                         rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len, &res );
395                         REQUIRE_RC( rc );
396 
397                         //std::cout << "result: " << res << std::endl;
398 
399                         rc = ReleasePileupEstimator( estim );
400                         REQUIRE_RC( rc );
401                     }
402                     VCursorRelease( ref_curs );
403                 }
404                 VCursorRelease( prim_curs );
405             }
406             VDatabaseRelease( db );
407         }
408         VDBManagerRelease( mgr );
409     }
410 }
411 
412 
TEST_CASE(Estimator_5)413 TEST_CASE ( Estimator_5 )
414 {
415     std::cout << "Estimator-Test #5 ( with 2 cursors provided, one column on alignments missing )" << std::endl;
416     const VDBManager *mgr;
417     rc_t rc = VDBManagerMakeRead( &mgr, NULL );
418     REQUIRE_RC( rc );
419     if ( rc == 0 )
420     {
421         const VDatabase *db;
422         rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", ACC1 );
423         REQUIRE_RC( rc );
424         if ( rc == 0 )
425         {
426             const VCursor * prim_curs;
427             rc = make_cursor( db, &prim_curs, "PRIMARY_ALIGNMENT", 1, "REF_POS" );
428             REQUIRE_RC( rc );
429             if ( rc == 0 )
430             {
431                 const VCursor * ref_curs;
432                 rc = make_cursor( db, &ref_curs, "REFERENCE", 4, "SEQ_ID", "SEQ_LEN", "MAX_SEQ_LEN", "PRIMARY_ALIGNMENT_IDS" );
433                 REQUIRE_RC( rc );
434                 if ( rc == 0 )
435                 {
436                     struct PileupEstimator * estim;
437                     rc_t rc = MakePileupEstimator( &estim, NULL, 0, ref_curs, prim_curs, 0, false );
438                     REQUIRE_RC_FAIL( rc );
439                     if ( rc == 0 )
440                         ReleasePileupEstimator( estim );
441                     VCursorRelease( ref_curs );
442                 }
443                 VCursorRelease( prim_curs );
444             }
445             VDatabaseRelease( db );
446         }
447         VDBManagerRelease( mgr );
448     }
449 }
450 
TEST_CASE(Estimator_6)451 TEST_CASE ( Estimator_6 )
452 {
453     std::cout << "Estimator-Test #6 ( with 2 cursors provided, one column on ref missing )" << std::endl;
454     const VDBManager *mgr;
455     rc_t rc = VDBManagerMakeRead( &mgr, NULL );
456     REQUIRE_RC( rc );
457     if ( rc == 0 )
458     {
459         const VDatabase *db;
460         rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", ACC1 );
461         REQUIRE_RC( rc );
462         if ( rc == 0 )
463         {
464             const VCursor * prim_curs;
465             rc = make_cursor( db, &prim_curs, "PRIMARY_ALIGNMENT", 2, "REF_POS", "REF_LEN" );
466             REQUIRE_RC( rc );
467             if ( rc == 0 )
468             {
469                 const VCursor * ref_curs;
470                 rc = make_cursor( db, &ref_curs, "REFERENCE", 3, "SEQ_ID", "SEQ_LEN", "MAX_SEQ_LEN" );
471                 REQUIRE_RC( rc );
472                 if ( rc == 0 )
473                 {
474                     struct PileupEstimator * estim;
475                     rc_t rc = MakePileupEstimator( &estim, NULL, 0, ref_curs, prim_curs, 0, false );
476                     REQUIRE_RC_FAIL( rc );
477                     if ( rc == 0 )
478                         ReleasePileupEstimator( estim );
479                     VCursorRelease( ref_curs );
480                 }
481                 VCursorRelease( prim_curs );
482             }
483             VDatabaseRelease( db );
484         }
485         VDBManagerRelease( mgr );
486     }
487 }
488 
TEST_CASE(Estimator_7)489 TEST_CASE ( Estimator_7 )
490 {
491     std::cout << "Estimator-Test #7 ( with ref_cursor, no prim_cursor provided )" << std::endl;
492     const VDBManager *mgr;
493     rc_t rc = VDBManagerMakeRead( &mgr, NULL );
494     REQUIRE_RC( rc );
495     if ( rc == 0 )
496     {
497         const VDatabase *db;
498         rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", ACC1 );
499         REQUIRE_RC( rc );
500         if ( rc == 0 )
501         {
502             const VCursor * ref_curs;
503             rc = make_cursor( db, &ref_curs, "REFERENCE", 4, "SEQ_ID", "SEQ_LEN", "MAX_SEQ_LEN", "PRIMARY_ALIGNMENT_IDS" );
504             REQUIRE_RC( rc );
505             if ( rc == 0 )
506             {
507                 struct PileupEstimator * estim;
508                 rc_t rc = MakePileupEstimator( &estim, ACC1, 0, ref_curs, NULL, 0, false );
509                 REQUIRE_RC( rc );
510                 if ( rc == 0 )
511                 {
512                     uint64_t res = 0;
513 
514                     String rname;
515                     StringInitCString( &rname, ACC1_REF );
516 
517                     rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len, &res );
518                     REQUIRE_RC( rc );
519 
520                     //std::cout << "result: " << res << std::endl;
521 
522                     rc = ReleasePileupEstimator( estim );
523                     REQUIRE_RC( rc );
524                 }
525                 VCursorRelease( ref_curs );
526             }
527             VDatabaseRelease( db );
528         }
529         VDBManagerRelease( mgr );
530     }
531 }
532 
TEST_CASE(Estimator_8)533 TEST_CASE ( Estimator_8 )
534 {
535     std::cout << "Estimator-Test #8 ( with prim_cursor, no ref_cursor provided )" << std::endl;
536     const VDBManager *mgr;
537     rc_t rc = VDBManagerMakeRead( &mgr, NULL );
538     REQUIRE_RC( rc );
539     if ( rc == 0 )
540     {
541         const VDatabase *db;
542         rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", ACC1 );
543         REQUIRE_RC( rc );
544         if ( rc == 0 )
545         {
546             const VCursor * prim_curs;
547             rc = make_cursor( db, &prim_curs, "PRIMARY_ALIGNMENT", 2, "REF_POS", "REF_LEN" );
548             REQUIRE_RC( rc );
549             if ( rc == 0 )
550             {
551                 struct PileupEstimator * estim;
552                 rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, prim_curs, 0, false );
553                 REQUIRE_RC( rc );
554                 if ( rc == 0 )
555                 {
556                     uint64_t res = 0;
557 
558                     String rname;
559                     StringInitCString( &rname, ACC1_REF );
560 
561                     rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len, &res );
562                     REQUIRE_RC( rc );
563 
564                     //std::cout << "result: " << res << std::endl;
565 
566                     rc = ReleasePileupEstimator( estim );
567                     REQUIRE_RC( rc );
568                 }
569                 VCursorRelease( prim_curs );
570             }
571             VDatabaseRelease( db );
572         }
573         VDBManagerRelease( mgr );
574     }
575 }
576 
TEST_CASE(Estimator_9)577 TEST_CASE ( Estimator_9 )
578 {
579     std::cout << "Estimator-Test #9 ( reference-name missing )" << std::endl;
580 
581     struct PileupEstimator * estim;
582     rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
583     REQUIRE_RC( rc );
584     if ( rc == 0 )
585     {
586         uint64_t res = 0;
587 
588         rc = RunPileupEstimator( estim, NULL, slice1_start, slice1_len, &res );
589         REQUIRE_RC_FAIL( rc );
590         if ( rc == 0 )
591             ReleasePileupEstimator( estim );
592     }
593 }
594 
TEST_CASE(Estimator_10)595 TEST_CASE ( Estimator_10 )
596 {
597     std::cout << "Estimator-Test #10 ( slice-length is zero )" << std::endl;
598 
599     struct PileupEstimator * estim;
600     rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
601     REQUIRE_RC( rc );
602     if ( rc == 0 )
603     {
604         uint64_t res = 0;
605 
606         String rname;
607         StringInitCString( &rname, ACC1_REF );
608 
609         rc = RunPileupEstimator( estim, &rname, slice1_start, 0, &res );
610         REQUIRE_RC_FAIL( rc );
611         if ( rc == 0 )
612             ReleasePileupEstimator( estim );
613     }
614 }
615 
TEST_CASE(Estimator_11)616 TEST_CASE ( Estimator_11 )
617 {
618     std::cout << "Estimator-Test #11 ( slice-start beyond end of reference )" << std::endl;
619 
620     struct PileupEstimator * estim;
621     rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
622     REQUIRE_RC( rc );
623     if ( rc == 0 )
624     {
625         uint64_t res = 0;
626 
627         String rname;
628         StringInitCString( &rname, ACC1_REF );
629 
630         rc = RunPileupEstimator( estim, &rname, slice1_start * 2, slice1_len, &res );
631         REQUIRE_RC_FAIL( rc );
632         if ( rc == 0 )
633             ReleasePileupEstimator( estim );
634     }
635 }
636 
TEST_CASE(Estimator_12)637 TEST_CASE ( Estimator_12 )
638 {
639     std::cout << "Estimator-Test #12 ( slice-length beyond end of reference )" << std::endl;
640 
641     struct PileupEstimator * estim;
642     rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
643     REQUIRE_RC( rc );
644     if ( rc == 0 )
645     {
646         uint64_t res = 0;
647 
648         String rname;
649         StringInitCString( &rname, ACC1_REF );
650 
651         rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len * 100000, &res );
652         REQUIRE_RC_FAIL( rc );
653         if ( rc == 0 )
654             ReleasePileupEstimator( estim );
655     }
656 }
657 
TEST_CASE(Estimator_13)658 TEST_CASE ( Estimator_13 )
659 {
660     std::cout << "Estimator-Test #13 ( result-ptr missing )" << std::endl;
661 
662     struct PileupEstimator * estim;
663     rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
664     REQUIRE_RC( rc );
665     if ( rc == 0 )
666     {
667         String rname;
668         StringInitCString( &rname, ACC1_REF );
669 
670         rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len, NULL );
671         REQUIRE_RC_FAIL( rc );
672         if ( rc == 0 )
673             ReleasePileupEstimator( estim );
674     }
675 }
676 
677 const char * ACC2 = "SRR5450996";
678 const char * ACC2_REF = "NC_000068.7";
679 const uint64_t slice2_start = 98662227;
680 const uint32_t slice2_len   = 1000;
681 
TEST_CASE(Estimator_14)682 TEST_CASE ( Estimator_14 )
683 {
684     std::cout << "Estimator-Test #14 ( cutoff-value on expensive accession ) " << std::endl;
685 
686     struct PileupEstimator * estim;
687     rc_t rc = MakePileupEstimator( &estim, ACC2, 0, NULL, NULL, 1000000, false );
688     REQUIRE_RC( rc );
689     if ( rc == 0 )
690     {
691 		uint64_t res = 0;
692 
693         String rname;
694         StringInitCString( &rname, ACC2_REF );
695 
696         rc = RunPileupEstimator( estim, &rname, slice2_start, slice2_len, &res );
697         REQUIRE_RC( rc );
698 
699         //std::cout << "result: " << res << std::endl;
700 
701 		REQUIRE_EQUAL( res, std::numeric_limits<uint64_t>::max() );
702 
703         rc = ReleasePileupEstimator( estim );
704         REQUIRE_RC( rc );
705     }
706 }
707 
TEST_CASE(Estimator_15)708 TEST_CASE ( Estimator_15 )
709 {
710     std::cout << "Estimator-Test #15 ( multiple invocations with different slices )" << std::endl;
711 
712     struct PileupEstimator * estim;
713     rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
714     REQUIRE_RC( rc );
715     if ( rc == 0 )
716     {
717 		uint64_t res = 0;
718 
719         String rname;
720         StringInitCString( &rname, ACC1_REF );
721 
722         rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len, &res );
723         REQUIRE_RC( rc );
724         //std::cout << "result: " << res << std::endl;
725 
726         rc = RunPileupEstimator( estim, &rname, slice1_start + 100, slice1_len * 10, &res );
727         REQUIRE_RC( rc );
728         //std::cout << "result: " << res << std::endl;
729 
730         rc = RunPileupEstimator( estim, &rname, slice1_start + 1000, slice1_len * 100, &res );
731         REQUIRE_RC( rc );
732         //std::cout << "result: " << res << std::endl;
733 
734         rc = RunPileupEstimator( estim, &rname, slice1_start + 6000, slice1_len, &res );
735         REQUIRE_RC( rc );
736         //std::cout << "result: " << res << std::endl;
737 
738         rc = ReleasePileupEstimator( estim );
739         REQUIRE_RC( rc );
740     }
741 }
742 
743 const char * ACC3 = "SRR543323";
744 const char * ACC3_REF = "NC_000077.5";
745 const uint64_t slice3_start = 0;
746 const uint64_t slice3_len = 121843856;
747 const uint32_t max_depth = 32;
748 
TEST_CASE(Estimator_16)749 TEST_CASE ( Estimator_16 )
750 {
751     std::cout << "Estimator-Test #16 ( RunCoverage, whole reference at once )" << std::endl;
752 
753     struct PileupEstimator * estim;
754     rc_t rc = MakePileupEstimator( &estim, ACC3, 0, NULL, NULL, 0, false );
755     REQUIRE_RC( rc );
756     if ( rc == 0 )
757     {
758         uint32_t * coverage1 = ( uint32_t * )calloc( slice3_len, sizeof * coverage1 );
759         uint32_t * coverage2 = ( uint32_t * )calloc( slice3_len, sizeof * coverage2 );
760         uint32_t depths1[ max_depth ];
761         uint32_t depths2[ max_depth ];
762 
763         memset( depths1, 0, sizeof depths1 );
764         memset( depths2, 0, sizeof depths2 );
765 
766         String rname;
767         StringInitCString( &rname, ACC3_REF );
768 
769         rc = RunCoverage( estim, &rname, slice3_start, slice3_len, coverage1 );
770         REQUIRE_RC( rc );
771 
772         rc = calc_coverage_using_ref_iter( ACC3, ACC3_REF, slice3_start, slice3_len, coverage2 );
773         REQUIRE_RC( rc );
774 
775         uint32_t differences1 = 0;
776         for ( uint32_t pos = 0; pos < slice3_len; pos++ )
777         {
778             if ( coverage1[ pos ] != coverage2[ pos ] )
779             {
780                 std::cout << ( slice3_start + pos ) << " : " << coverage1[ pos ] << " , " << coverage2[ pos ] << std::endl;
781                 differences1++;
782             }
783 
784             if ( coverage1[ pos ] >= max_depth )
785                 depths1[ max_depth - 1 ] += 1;
786             else
787                 depths1[ coverage1[ pos ] ] += 1;
788 
789             if ( coverage2[ pos ] >= max_depth )
790                 depths2[ max_depth - 1 ] += 1;
791             else
792                 depths2[ coverage2[ pos ] ] += 1;
793 
794         }
795 
796         uint32_t differences2 = 0;
797         for ( uint32_t pos = 0; pos < max_depth; pos++ )
798         {
799             if ( depths1[ pos ] != depths2[ pos ] )
800             {
801                 std::cout << pos << " : " << depths1[ pos ] << " , " << depths2[ pos ] << std::endl;
802                 differences2++;
803             }
804         }
805 
806         REQUIRE_EQUAL( differences1, (uint32_t)0 );
807         REQUIRE_EQUAL( differences2, (uint32_t)0 );
808 
809         rc = ReleasePileupEstimator( estim );
810         REQUIRE_RC( rc );
811 
812         free( ( void * ) coverage1 );
813         free( ( void * ) coverage2 );
814     }
815 }
816 
817 
TEST_CASE(Estimator_17)818 TEST_CASE ( Estimator_17 )
819 {
820     std::cout << "Estimator-Test #17 ( RunCoverage vs ngs-pileup )" << std::endl;
821 
822     struct PileupEstimator * estim;
823     rc_t rc = MakePileupEstimator( &estim, ACC3, 0, NULL, NULL, 0, true );
824     REQUIRE_RC( rc );
825     if ( rc == 0 )
826     {
827         uint32_t * coverage1 = ( uint32_t * )calloc( slice3_len, sizeof * coverage1 );
828         uint32_t * coverage2 = ( uint32_t * )calloc( slice3_len, sizeof * coverage2 );
829         uint32_t depths1[ max_depth ];
830         uint32_t depths2[ max_depth ];
831 
832         memset( depths1, 0, sizeof depths1 );
833         memset( depths2, 0, sizeof depths2 );
834 
835         String rname;
836         StringInitCString( &rname, ACC3_REF );
837 
838         rc = RunCoverage( estim, &rname, slice3_start, slice3_len, coverage1 );
839         REQUIRE_RC( rc );
840 
841         calc_coverage_using_ngs( ACC3, ACC3_REF, slice3_start, slice3_len, coverage2 );
842 
843         uint32_t differences1 = 0;
844         for ( uint32_t pos = 0; pos < slice3_len; pos++ )
845         {
846             if ( coverage1[ pos ] != coverage2[ pos ] )
847             {
848                 std::cout << ( slice3_start + pos ) << " : " << coverage1[ pos ] << " , " << coverage2[ pos ] << std::endl;
849                 differences1++;
850             }
851 
852             if ( coverage1[ pos ] >= max_depth )
853                 depths1[ max_depth - 1 ] += 1;
854             else
855                 depths1[ coverage1[ pos ] ] += 1;
856 
857             if ( coverage2[ pos ] >= max_depth )
858                 depths2[ max_depth - 1 ] += 1;
859             else
860                 depths2[ coverage2[ pos ] ] += 1;
861 
862         }
863 
864         uint32_t differences2 = 0;
865         for ( uint32_t pos = 0; pos < max_depth; pos++ )
866         {
867             if ( depths1[ pos ] != depths2[ pos ] )
868             {
869                 std::cout << pos << " : " << depths1[ pos ] << " , " << depths2[ pos ] << std::endl;
870                 differences2++;
871             }
872         }
873 
874         REQUIRE_EQUAL( differences1, (uint32_t)0 );
875         REQUIRE_EQUAL( differences2, (uint32_t)0 );
876 
877         rc = ReleasePileupEstimator( estim );
878         REQUIRE_RC( rc );
879 
880         free( ( void * ) coverage1 );
881         free( ( void * ) coverage2 );
882     }
883 
884 }
885 
886 
TEST_CASE(Estimator_18)887 TEST_CASE ( Estimator_18 )
888 {
889     std::cout << "Estimator-Test #18 ( loop through the references )" << std::endl;
890     struct PileupEstimator * estim;
891     rc_t rc = MakePileupEstimator( &estim, ACC3, 0, NULL, NULL, 0, false );
892     REQUIRE_RC( rc );
893 
894     const ReferenceList *reflist;
895     const VDBManager *mgr;
896     rc = VDBManagerMakeRead( &mgr, NULL );
897     REQUIRE_RC( rc );
898 
899     rc = ReferenceList_MakePath( &reflist, mgr, ACC3, ereferencelist_usePrimaryIds, 0, NULL, 0 );
900     REQUIRE_RC( rc );
901 
902     uint32_t count1, count2;
903     rc = EstimatorRefCount( estim, &count1 );
904     REQUIRE_RC( rc );
905 
906     rc = ReferenceList_Count( reflist, &count2 );
907     REQUIRE_RC( rc );
908 
909     REQUIRE_EQUAL( count1, count2 );
910     std::cout << "count1 : " << count1 << "  count2 : " << count2 << std::endl;
911 
912     for ( uint32_t idx = 0; rc == 0 && idx < count1; ++idx )
913     {
914         String refname;
915         uint64_t reflen;
916 
917         rc = EstimatorRefInfo( estim, idx, &refname, &reflen );
918         REQUIRE_RC( rc );
919 
920         const ReferenceObj *refobj;
921         rc = ReferenceList_Get( reflist, &refobj, idx );
922         REQUIRE_RC( rc );
923 
924         const char * seqid;
925         rc = ReferenceObj_SeqId( refobj, &seqid );
926         REQUIRE_RC( rc );
927 
928         String SeqId;
929         StringInitCString( &SeqId, seqid );
930 
931         int cmp = StringCompare( &refname, &SeqId );
932         REQUIRE_EQUAL( cmp, (int)0 );
933 
934         INSDC_coord_len seqlen;
935         rc = ReferenceObj_SeqLength( refobj, &seqlen );
936         REQUIRE_RC( rc );
937         REQUIRE_EQUAL( (uint64_t)seqlen, reflen );
938 
939         ReferenceObj_Release( refobj );
940         //std::cout << " [" << idx << "] : " << refname.addr << " . " << reflen << std::endl;
941     }
942 
943     ReferenceList_Release( reflist ); // has no return-value!
944     rc = VDBManagerRelease( mgr );
945     REQUIRE_RC( rc );
946 
947     rc = ReleasePileupEstimator( estim );
948     REQUIRE_RC( rc );
949 }
950 
951 
952 //////////////////////////////////////////// Main
953 #include <kapp/args.h>
954 #include <klib/out.h>
955 #include <kfg/config.h>
956 
957 extern "C"
958 {
959 
KAppVersion(void)960 ver_t CC KAppVersion ( void )
961 {
962     return 0x1000000;
963 }
964 
965 const char UsageDefaultName[] = "test-estimator";
966 
UsageSummary(const char * progname)967 rc_t CC UsageSummary (const char * progname)
968 {
969     return KOutMsg ( "Usage:\n" "\t%s [options]\n\n", progname );
970 }
971 
Usage(const Args * args)972 rc_t CC Usage( const Args* args )
973 {
974     return 0;
975 }
976 
KMain(int argc,char * argv[])977 rc_t CC KMain ( int argc, char *argv [] )
978 {
979     return PileupEstimatorTestSuite( argc, argv );
980 }
981 
982 }
983 
984