1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 /**
28 * Unit tests for the pileup-estimator
29 */
30
31 #include <limits> /* for std::numeric_limits<uint64_t>::max() in test #14 */
32
33 #include <ktst/unit_test.hpp>
34
35 #include <stdexcept>
36 #include <string>
37
38 #include <klib/rc.h>
39 #include <klib/text.h>
40
41 #include <vdb/manager.h>
42 #include <vdb/database.h>
43 #include <vdb/table.h>
44 #include <vdb/cursor.h>
45
46 #include <align/reference.h>
47 #include <align/manager.h>
48 #include <align/iterator.h>
49
50 #include <align/unsupported_pileup_estimator.h>
51
52 #include <ngs/ncbi/NGS.hpp> // openReadCollection
53 #include <ngs/ErrorMsg.hpp>
54 #include <ngs/ReadCollection.hpp>
55 #include <ngs/Reference.hpp>
56 #include <ngs/Alignment.hpp>
57 #include <ngs/PileupIterator.hpp>
58
59 using namespace std;
60
calc_coverage_sum_using_ref_iter(const char * acc,const char * refname,uint64_t slice_start,uint32_t slice_len,uint64_t * result)61 static rc_t calc_coverage_sum_using_ref_iter( const char * acc, const char * refname,
62 uint64_t slice_start, uint32_t slice_len, uint64_t * result )
63 {
64 const VDBManager *mgr;
65 rc_t rc = VDBManagerMakeRead( &mgr, NULL );
66 if ( rc == 0 )
67 {
68 const VDatabase *db;
69 rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", acc );
70 if ( rc == 0 )
71 {
72 const VTable * tbl;
73 rc = VDatabaseOpenTableRead( db, &tbl, "%s", "PRIMARY_ALIGNMENT" );
74 if ( rc == 0 )
75 {
76 const VCursor * prim_curs;
77 rc = VTableCreateCursorRead( tbl, &prim_curs );
78 if ( rc == 0 )
79 {
80 const ReferenceList *ref_list;
81 uint32_t reflist_options = ereferencelist_usePrimaryIds;
82 rc = ReferenceList_MakeDatabase( &ref_list, db, reflist_options, 0, NULL, 0 ); /* align/reference.h */
83 if ( rc == 0 )
84 {
85 const ReferenceObj * ref_obj;
86 rc = ReferenceList_Find( ref_list, &ref_obj, refname, string_size( refname ) ); /* align/reference.h */
87 {
88 const AlignMgr * a_mgr;
89 rc = AlignMgrMakeRead( &a_mgr ); /* align/manager.h */
90 if ( rc == 0 )
91 {
92 ReferenceIterator * ref_iter;
93 rc = AlignMgrMakeReferenceIterator( a_mgr, &ref_iter, NULL, 0 );
94 if ( rc == 0 )
95 {
96 rc = ReferenceIteratorAddPlacements( ref_iter, /* the outer ref-iter */
97 ref_obj, /* the ref-obj for this chromosome */
98 slice_start, /* start ( zero-based ) */
99 slice_len, /* length */
100 NULL, /* ref-cursor */
101 prim_curs, /* align-cursor */
102 primary_align_ids, /* which id's */
103 NULL, /* what read-group */
104 NULL ); /* placement-context */
105 while ( rc == 0 )
106 {
107 rc = ReferenceIteratorNextReference( ref_iter, NULL, NULL, NULL );
108 rc_t rc1 = 0;
109 while ( rc == 0 && rc1 == 0 )
110 {
111 INSDC_coord_zero first_pos;
112 INSDC_coord_len len;
113 rc1 = ReferenceIteratorNextWindow( ref_iter, &first_pos, &len );
114 rc_t rc2 = 0;
115 while ( rc1 == 0 && rc2 == 0 )
116 {
117 rc2 = ReferenceIteratorNextPos( ref_iter, true ); /* do skip empty positions */
118 if ( rc2 == 0 )
119 {
120 uint32_t depth;
121 rc2 = ReferenceIteratorPosition( ref_iter, NULL, &depth, NULL );
122 if ( rc2 == 0 )
123 *result += depth;
124 }
125 }
126 }
127 }
128 if ( GetRCState( rc ) == rcDone ) rc = 0;
129 ReferenceIteratorRelease( ref_iter );
130 }
131 AlignMgrRelease( a_mgr );
132 }
133 ReferenceObj_Release( ref_obj );
134 }
135 ReferenceList_Release( ref_list );
136 }
137 VCursorRelease( prim_curs );
138 }
139 VTableRelease ( tbl );
140 }
141 VDatabaseRelease( db );
142 }
143 VDBManagerRelease( mgr );
144 }
145 return rc;
146 }
147
calc_coverage_using_ref_iter(const char * acc,const char * refname,uint64_t slice_start,uint32_t slice_len,uint32_t * coverage)148 static rc_t calc_coverage_using_ref_iter( const char * acc, const char * refname,
149 uint64_t slice_start, uint32_t slice_len, uint32_t * coverage )
150 {
151 const VDBManager *mgr;
152 rc_t rc = VDBManagerMakeRead( &mgr, NULL );
153 if ( rc == 0 )
154 {
155 const VDatabase *db;
156 rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", acc );
157 if ( rc == 0 )
158 {
159 const VTable * tbl;
160 rc = VDatabaseOpenTableRead( db, &tbl, "%s", "PRIMARY_ALIGNMENT" );
161 if ( rc == 0 )
162 {
163 const VCursor * prim_curs;
164 rc = VTableCreateCursorRead( tbl, &prim_curs );
165 if ( rc == 0 )
166 {
167 const ReferenceList *ref_list;
168 uint32_t reflist_options = ereferencelist_usePrimaryIds;
169 rc = ReferenceList_MakeDatabase( &ref_list, db, reflist_options, 0, NULL, 0 ); /* align/reference.h */
170 if ( rc == 0 )
171 {
172 const ReferenceObj * ref_obj;
173 rc = ReferenceList_Find( ref_list, &ref_obj, refname, string_size( refname ) ); /* align/reference.h */
174 {
175 const AlignMgr * a_mgr;
176 rc = AlignMgrMakeRead( &a_mgr ); /* align/manager.h */
177 if ( rc == 0 )
178 {
179 ReferenceIterator * ref_iter;
180 rc = AlignMgrMakeReferenceIterator( a_mgr, &ref_iter, NULL, 0 );
181 if ( rc == 0 )
182 {
183 memset( coverage, 0, slice_len * ( sizeof *coverage ) );
184 rc = ReferenceIteratorAddPlacements( ref_iter, /* the outer ref-iter */
185 ref_obj, /* the ref-obj for this chromosome */
186 slice_start, /* start ( zero-based ) */
187 slice_len, /* length */
188 NULL, /* ref-cursor */
189 prim_curs, /* align-cursor */
190 primary_align_ids, /* which id's */
191 NULL, /* what read-group */
192 NULL ); /* placement-context */
193 while ( rc == 0 )
194 {
195 rc = ReferenceIteratorNextReference( ref_iter, NULL, NULL, NULL );
196 rc_t rc1 = 0;
197 while ( rc == 0 && rc1 == 0 )
198 {
199 INSDC_coord_zero first_pos;
200 INSDC_coord_len len;
201 rc1 = ReferenceIteratorNextWindow( ref_iter, &first_pos, &len );
202 rc_t rc2 = 0;
203 while ( rc1 == 0 && rc2 == 0 )
204 {
205 rc2 = ReferenceIteratorNextPos( ref_iter, true ); /* do skip empty positions */
206 if ( rc2 == 0 )
207 {
208 uint32_t depth;
209 INSDC_coord_zero this_pos;
210 rc2 = ReferenceIteratorPosition( ref_iter, &this_pos, &depth, NULL );
211 if ( rc == 0 )
212 {
213 int64_t ofs = ( this_pos - slice_start );
214 coverage[ ofs ] = depth;
215 }
216 }
217 }
218 }
219 }
220 if ( GetRCState( rc ) == rcDone ) rc = 0;
221 ReferenceIteratorRelease( ref_iter );
222 }
223 AlignMgrRelease( a_mgr );
224 }
225 ReferenceObj_Release( ref_obj );
226 }
227 ReferenceList_Release( ref_list );
228 }
229 VCursorRelease( prim_curs );
230 }
231 VTableRelease ( tbl );
232 }
233 VDatabaseRelease( db );
234 }
235 VDBManagerRelease( mgr );
236 }
237 return rc;
238 }
239
240
calc_coverage_using_ngs(ngs::String acc,ngs::String refname,uint64_t slice_start,uint32_t slice_len,uint32_t * coverage)241 static void calc_coverage_using_ngs( ngs::String acc, ngs::String refname,
242 uint64_t slice_start, uint32_t slice_len, uint32_t * coverage )
243 {
244 try
245 {
246 ngs::ReadCollection run( ncbi::NGS::openReadCollection( acc ) );
247 ngs::Reference ref = run.getReference ( refname );
248 ngs::PileupIterator it = ref.getPileupSlice( slice_start, slice_len );
249 while ( it.nextPileup() )
250 {
251 uint32_t depth( it.getPileupDepth() );
252 if ( depth > 0 )
253 {
254 uint64_t pos( it.getReferencePosition() );
255 int64_t ofs = ( pos - slice_start );
256 if ( ofs >= 0 && ofs < slice_len )
257 coverage[ ofs ] = depth;
258 }
259 }
260 }
261
262 catch ( ngs::ErrorMsg & e ) {
263 cerr << "Error: " << e . toString () << endl;
264 }
265 catch ( std::exception & e ) {
266 cerr << "Error: " << e . what () << endl;
267 }
268 catch ( ... ) {
269 cerr << "Error: unknown exception" << endl;
270 }
271 }
272
273 const char * ACC1 = "SRR341578";
274 const char * ACC1_REF = "NC_011748.1";
275 const uint64_t slice1_start = 3000002;
276 const uint32_t slice1_len = 200;
277
278
279 TEST_SUITE( PileupEstimatorTestSuite );
280
TEST_CASE(Estimator_1)281 TEST_CASE ( Estimator_1 )
282 {
283 std::cout << "Estimator-Test #1 ( estimator vs. ref_iter ) " << std::endl;
284
285 uint64_t res1 = 0;
286
287 struct PileupEstimator * estim;
288 rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
289 REQUIRE_RC( rc );
290 if ( rc == 0 )
291 {
292 String rname;
293 StringInitCString( &rname, ACC1_REF );
294
295 rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len, &res1 );
296 REQUIRE_RC( rc );
297
298 //std::cout << "result ( using RunPileupEstimator ) : " << res1 << std::endl;
299
300 rc = ReleasePileupEstimator( estim );
301 REQUIRE_RC( rc );
302 }
303
304 uint64_t res2 = 0;
305
306 rc = calc_coverage_sum_using_ref_iter( ACC1, ACC1_REF, slice1_start, slice1_len, &res2 );
307 REQUIRE_RC( rc );
308
309 //std::cout << "result ( using ReferenceIterator ) : " << res2 << std::endl;
310
311 REQUIRE_EQUAL( res1, res2 );
312 }
313
TEST_CASE(Estimator_2)314 TEST_CASE ( Estimator_2 )
315 {
316 std::cout << "Estimator-Test #2 ( no sources and no cursors )" << std::endl;
317
318 // MakePileupEstimator has to fail when source and the cursors are NULL
319 struct PileupEstimator * estim;
320 rc_t rc = MakePileupEstimator( &estim, NULL, 0, NULL, NULL, 0, false );
321 REQUIRE_RC_FAIL( rc );
322 }
323
TEST_CASE(Estimator_3)324 TEST_CASE ( Estimator_3 )
325 {
326 std::cout << "Estimator-Test #3 ( no self-ptr )" << std::endl;
327
328 // MakePileupEstimator has to fail when given a NULL-ptr as *self
329 rc_t rc = MakePileupEstimator( NULL, ACC1, 0, NULL, NULL, 0, false );
330 REQUIRE_RC_FAIL( rc );
331 }
332
make_cursor(const VDatabase * db,const VCursor ** curs,const char * tbl_name,uint32_t count,...)333 static rc_t make_cursor( const VDatabase *db, const VCursor ** curs, const char * tbl_name, uint32_t count, ... )
334 {
335 const VTable * tbl;
336 rc_t rc = VDatabaseOpenTableRead( db, &tbl, "%s", tbl_name );
337 if ( rc == 0 )
338 {
339 rc = VTableCreateCursorRead( tbl, curs );
340 if ( rc == 0 )
341 {
342 uint32_t i;
343 va_list args;
344 va_start( args, count );
345 for ( i = 0; rc == 0 && i < count; i++ )
346 {
347 const char * colname = va_arg( args, const char * );
348 uint32_t idx;
349 rc = VCursorAddColumn( *curs, &idx, colname );
350 }
351 va_end( args );
352 if ( rc == 0 )
353 rc = VCursorOpen( *curs );
354 }
355 VTableRelease( tbl );
356 }
357 return rc;
358 }
359
360
TEST_CASE(Estimator_4)361 TEST_CASE ( Estimator_4 )
362 {
363 std::cout << "Estimator-Test #4 ( with 2 cursors provided )" << std::endl;
364 const VDBManager *mgr;
365 rc_t rc = VDBManagerMakeRead( &mgr, NULL );
366 REQUIRE_RC( rc );
367 if ( rc == 0 )
368 {
369 const VDatabase *db;
370 rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", ACC1 );
371 REQUIRE_RC( rc );
372 if ( rc == 0 )
373 {
374 const VCursor * prim_curs;
375 rc = make_cursor( db, &prim_curs, "PRIMARY_ALIGNMENT", 2, "REF_POS", "REF_LEN" );
376 REQUIRE_RC( rc );
377 if ( rc == 0 )
378 {
379 const VCursor * ref_curs;
380 rc = make_cursor( db, &ref_curs, "REFERENCE", 4, "SEQ_ID", "SEQ_LEN", "MAX_SEQ_LEN", "PRIMARY_ALIGNMENT_IDS" );
381 REQUIRE_RC( rc );
382 if ( rc == 0 )
383 {
384 struct PileupEstimator * estim;
385 rc_t rc = MakePileupEstimator( &estim, NULL, 0, ref_curs, prim_curs, 0, false );
386 REQUIRE_RC( rc );
387 if ( rc == 0 )
388 {
389 uint64_t res = 0;
390
391 String rname;
392 StringInitCString( &rname, ACC1_REF );
393
394 rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len, &res );
395 REQUIRE_RC( rc );
396
397 //std::cout << "result: " << res << std::endl;
398
399 rc = ReleasePileupEstimator( estim );
400 REQUIRE_RC( rc );
401 }
402 VCursorRelease( ref_curs );
403 }
404 VCursorRelease( prim_curs );
405 }
406 VDatabaseRelease( db );
407 }
408 VDBManagerRelease( mgr );
409 }
410 }
411
412
TEST_CASE(Estimator_5)413 TEST_CASE ( Estimator_5 )
414 {
415 std::cout << "Estimator-Test #5 ( with 2 cursors provided, one column on alignments missing )" << std::endl;
416 const VDBManager *mgr;
417 rc_t rc = VDBManagerMakeRead( &mgr, NULL );
418 REQUIRE_RC( rc );
419 if ( rc == 0 )
420 {
421 const VDatabase *db;
422 rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", ACC1 );
423 REQUIRE_RC( rc );
424 if ( rc == 0 )
425 {
426 const VCursor * prim_curs;
427 rc = make_cursor( db, &prim_curs, "PRIMARY_ALIGNMENT", 1, "REF_POS" );
428 REQUIRE_RC( rc );
429 if ( rc == 0 )
430 {
431 const VCursor * ref_curs;
432 rc = make_cursor( db, &ref_curs, "REFERENCE", 4, "SEQ_ID", "SEQ_LEN", "MAX_SEQ_LEN", "PRIMARY_ALIGNMENT_IDS" );
433 REQUIRE_RC( rc );
434 if ( rc == 0 )
435 {
436 struct PileupEstimator * estim;
437 rc_t rc = MakePileupEstimator( &estim, NULL, 0, ref_curs, prim_curs, 0, false );
438 REQUIRE_RC_FAIL( rc );
439 if ( rc == 0 )
440 ReleasePileupEstimator( estim );
441 VCursorRelease( ref_curs );
442 }
443 VCursorRelease( prim_curs );
444 }
445 VDatabaseRelease( db );
446 }
447 VDBManagerRelease( mgr );
448 }
449 }
450
TEST_CASE(Estimator_6)451 TEST_CASE ( Estimator_6 )
452 {
453 std::cout << "Estimator-Test #6 ( with 2 cursors provided, one column on ref missing )" << std::endl;
454 const VDBManager *mgr;
455 rc_t rc = VDBManagerMakeRead( &mgr, NULL );
456 REQUIRE_RC( rc );
457 if ( rc == 0 )
458 {
459 const VDatabase *db;
460 rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", ACC1 );
461 REQUIRE_RC( rc );
462 if ( rc == 0 )
463 {
464 const VCursor * prim_curs;
465 rc = make_cursor( db, &prim_curs, "PRIMARY_ALIGNMENT", 2, "REF_POS", "REF_LEN" );
466 REQUIRE_RC( rc );
467 if ( rc == 0 )
468 {
469 const VCursor * ref_curs;
470 rc = make_cursor( db, &ref_curs, "REFERENCE", 3, "SEQ_ID", "SEQ_LEN", "MAX_SEQ_LEN" );
471 REQUIRE_RC( rc );
472 if ( rc == 0 )
473 {
474 struct PileupEstimator * estim;
475 rc_t rc = MakePileupEstimator( &estim, NULL, 0, ref_curs, prim_curs, 0, false );
476 REQUIRE_RC_FAIL( rc );
477 if ( rc == 0 )
478 ReleasePileupEstimator( estim );
479 VCursorRelease( ref_curs );
480 }
481 VCursorRelease( prim_curs );
482 }
483 VDatabaseRelease( db );
484 }
485 VDBManagerRelease( mgr );
486 }
487 }
488
TEST_CASE(Estimator_7)489 TEST_CASE ( Estimator_7 )
490 {
491 std::cout << "Estimator-Test #7 ( with ref_cursor, no prim_cursor provided )" << std::endl;
492 const VDBManager *mgr;
493 rc_t rc = VDBManagerMakeRead( &mgr, NULL );
494 REQUIRE_RC( rc );
495 if ( rc == 0 )
496 {
497 const VDatabase *db;
498 rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", ACC1 );
499 REQUIRE_RC( rc );
500 if ( rc == 0 )
501 {
502 const VCursor * ref_curs;
503 rc = make_cursor( db, &ref_curs, "REFERENCE", 4, "SEQ_ID", "SEQ_LEN", "MAX_SEQ_LEN", "PRIMARY_ALIGNMENT_IDS" );
504 REQUIRE_RC( rc );
505 if ( rc == 0 )
506 {
507 struct PileupEstimator * estim;
508 rc_t rc = MakePileupEstimator( &estim, ACC1, 0, ref_curs, NULL, 0, false );
509 REQUIRE_RC( rc );
510 if ( rc == 0 )
511 {
512 uint64_t res = 0;
513
514 String rname;
515 StringInitCString( &rname, ACC1_REF );
516
517 rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len, &res );
518 REQUIRE_RC( rc );
519
520 //std::cout << "result: " << res << std::endl;
521
522 rc = ReleasePileupEstimator( estim );
523 REQUIRE_RC( rc );
524 }
525 VCursorRelease( ref_curs );
526 }
527 VDatabaseRelease( db );
528 }
529 VDBManagerRelease( mgr );
530 }
531 }
532
TEST_CASE(Estimator_8)533 TEST_CASE ( Estimator_8 )
534 {
535 std::cout << "Estimator-Test #8 ( with prim_cursor, no ref_cursor provided )" << std::endl;
536 const VDBManager *mgr;
537 rc_t rc = VDBManagerMakeRead( &mgr, NULL );
538 REQUIRE_RC( rc );
539 if ( rc == 0 )
540 {
541 const VDatabase *db;
542 rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", ACC1 );
543 REQUIRE_RC( rc );
544 if ( rc == 0 )
545 {
546 const VCursor * prim_curs;
547 rc = make_cursor( db, &prim_curs, "PRIMARY_ALIGNMENT", 2, "REF_POS", "REF_LEN" );
548 REQUIRE_RC( rc );
549 if ( rc == 0 )
550 {
551 struct PileupEstimator * estim;
552 rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, prim_curs, 0, false );
553 REQUIRE_RC( rc );
554 if ( rc == 0 )
555 {
556 uint64_t res = 0;
557
558 String rname;
559 StringInitCString( &rname, ACC1_REF );
560
561 rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len, &res );
562 REQUIRE_RC( rc );
563
564 //std::cout << "result: " << res << std::endl;
565
566 rc = ReleasePileupEstimator( estim );
567 REQUIRE_RC( rc );
568 }
569 VCursorRelease( prim_curs );
570 }
571 VDatabaseRelease( db );
572 }
573 VDBManagerRelease( mgr );
574 }
575 }
576
TEST_CASE(Estimator_9)577 TEST_CASE ( Estimator_9 )
578 {
579 std::cout << "Estimator-Test #9 ( reference-name missing )" << std::endl;
580
581 struct PileupEstimator * estim;
582 rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
583 REQUIRE_RC( rc );
584 if ( rc == 0 )
585 {
586 uint64_t res = 0;
587
588 rc = RunPileupEstimator( estim, NULL, slice1_start, slice1_len, &res );
589 REQUIRE_RC_FAIL( rc );
590 if ( rc == 0 )
591 ReleasePileupEstimator( estim );
592 }
593 }
594
TEST_CASE(Estimator_10)595 TEST_CASE ( Estimator_10 )
596 {
597 std::cout << "Estimator-Test #10 ( slice-length is zero )" << std::endl;
598
599 struct PileupEstimator * estim;
600 rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
601 REQUIRE_RC( rc );
602 if ( rc == 0 )
603 {
604 uint64_t res = 0;
605
606 String rname;
607 StringInitCString( &rname, ACC1_REF );
608
609 rc = RunPileupEstimator( estim, &rname, slice1_start, 0, &res );
610 REQUIRE_RC_FAIL( rc );
611 if ( rc == 0 )
612 ReleasePileupEstimator( estim );
613 }
614 }
615
TEST_CASE(Estimator_11)616 TEST_CASE ( Estimator_11 )
617 {
618 std::cout << "Estimator-Test #11 ( slice-start beyond end of reference )" << std::endl;
619
620 struct PileupEstimator * estim;
621 rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
622 REQUIRE_RC( rc );
623 if ( rc == 0 )
624 {
625 uint64_t res = 0;
626
627 String rname;
628 StringInitCString( &rname, ACC1_REF );
629
630 rc = RunPileupEstimator( estim, &rname, slice1_start * 2, slice1_len, &res );
631 REQUIRE_RC_FAIL( rc );
632 if ( rc == 0 )
633 ReleasePileupEstimator( estim );
634 }
635 }
636
TEST_CASE(Estimator_12)637 TEST_CASE ( Estimator_12 )
638 {
639 std::cout << "Estimator-Test #12 ( slice-length beyond end of reference )" << std::endl;
640
641 struct PileupEstimator * estim;
642 rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
643 REQUIRE_RC( rc );
644 if ( rc == 0 )
645 {
646 uint64_t res = 0;
647
648 String rname;
649 StringInitCString( &rname, ACC1_REF );
650
651 rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len * 100000, &res );
652 REQUIRE_RC_FAIL( rc );
653 if ( rc == 0 )
654 ReleasePileupEstimator( estim );
655 }
656 }
657
TEST_CASE(Estimator_13)658 TEST_CASE ( Estimator_13 )
659 {
660 std::cout << "Estimator-Test #13 ( result-ptr missing )" << std::endl;
661
662 struct PileupEstimator * estim;
663 rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
664 REQUIRE_RC( rc );
665 if ( rc == 0 )
666 {
667 String rname;
668 StringInitCString( &rname, ACC1_REF );
669
670 rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len, NULL );
671 REQUIRE_RC_FAIL( rc );
672 if ( rc == 0 )
673 ReleasePileupEstimator( estim );
674 }
675 }
676
677 const char * ACC2 = "SRR5450996";
678 const char * ACC2_REF = "NC_000068.7";
679 const uint64_t slice2_start = 98662227;
680 const uint32_t slice2_len = 1000;
681
TEST_CASE(Estimator_14)682 TEST_CASE ( Estimator_14 )
683 {
684 std::cout << "Estimator-Test #14 ( cutoff-value on expensive accession ) " << std::endl;
685
686 struct PileupEstimator * estim;
687 rc_t rc = MakePileupEstimator( &estim, ACC2, 0, NULL, NULL, 1000000, false );
688 REQUIRE_RC( rc );
689 if ( rc == 0 )
690 {
691 uint64_t res = 0;
692
693 String rname;
694 StringInitCString( &rname, ACC2_REF );
695
696 rc = RunPileupEstimator( estim, &rname, slice2_start, slice2_len, &res );
697 REQUIRE_RC( rc );
698
699 //std::cout << "result: " << res << std::endl;
700
701 REQUIRE_EQUAL( res, std::numeric_limits<uint64_t>::max() );
702
703 rc = ReleasePileupEstimator( estim );
704 REQUIRE_RC( rc );
705 }
706 }
707
TEST_CASE(Estimator_15)708 TEST_CASE ( Estimator_15 )
709 {
710 std::cout << "Estimator-Test #15 ( multiple invocations with different slices )" << std::endl;
711
712 struct PileupEstimator * estim;
713 rc_t rc = MakePileupEstimator( &estim, ACC1, 0, NULL, NULL, 0, false );
714 REQUIRE_RC( rc );
715 if ( rc == 0 )
716 {
717 uint64_t res = 0;
718
719 String rname;
720 StringInitCString( &rname, ACC1_REF );
721
722 rc = RunPileupEstimator( estim, &rname, slice1_start, slice1_len, &res );
723 REQUIRE_RC( rc );
724 //std::cout << "result: " << res << std::endl;
725
726 rc = RunPileupEstimator( estim, &rname, slice1_start + 100, slice1_len * 10, &res );
727 REQUIRE_RC( rc );
728 //std::cout << "result: " << res << std::endl;
729
730 rc = RunPileupEstimator( estim, &rname, slice1_start + 1000, slice1_len * 100, &res );
731 REQUIRE_RC( rc );
732 //std::cout << "result: " << res << std::endl;
733
734 rc = RunPileupEstimator( estim, &rname, slice1_start + 6000, slice1_len, &res );
735 REQUIRE_RC( rc );
736 //std::cout << "result: " << res << std::endl;
737
738 rc = ReleasePileupEstimator( estim );
739 REQUIRE_RC( rc );
740 }
741 }
742
743 const char * ACC3 = "SRR543323";
744 const char * ACC3_REF = "NC_000077.5";
745 const uint64_t slice3_start = 0;
746 const uint64_t slice3_len = 121843856;
747 const uint32_t max_depth = 32;
748
TEST_CASE(Estimator_16)749 TEST_CASE ( Estimator_16 )
750 {
751 std::cout << "Estimator-Test #16 ( RunCoverage, whole reference at once )" << std::endl;
752
753 struct PileupEstimator * estim;
754 rc_t rc = MakePileupEstimator( &estim, ACC3, 0, NULL, NULL, 0, false );
755 REQUIRE_RC( rc );
756 if ( rc == 0 )
757 {
758 uint32_t * coverage1 = ( uint32_t * )calloc( slice3_len, sizeof * coverage1 );
759 uint32_t * coverage2 = ( uint32_t * )calloc( slice3_len, sizeof * coverage2 );
760 uint32_t depths1[ max_depth ];
761 uint32_t depths2[ max_depth ];
762
763 memset( depths1, 0, sizeof depths1 );
764 memset( depths2, 0, sizeof depths2 );
765
766 String rname;
767 StringInitCString( &rname, ACC3_REF );
768
769 rc = RunCoverage( estim, &rname, slice3_start, slice3_len, coverage1 );
770 REQUIRE_RC( rc );
771
772 rc = calc_coverage_using_ref_iter( ACC3, ACC3_REF, slice3_start, slice3_len, coverage2 );
773 REQUIRE_RC( rc );
774
775 uint32_t differences1 = 0;
776 for ( uint32_t pos = 0; pos < slice3_len; pos++ )
777 {
778 if ( coverage1[ pos ] != coverage2[ pos ] )
779 {
780 std::cout << ( slice3_start + pos ) << " : " << coverage1[ pos ] << " , " << coverage2[ pos ] << std::endl;
781 differences1++;
782 }
783
784 if ( coverage1[ pos ] >= max_depth )
785 depths1[ max_depth - 1 ] += 1;
786 else
787 depths1[ coverage1[ pos ] ] += 1;
788
789 if ( coverage2[ pos ] >= max_depth )
790 depths2[ max_depth - 1 ] += 1;
791 else
792 depths2[ coverage2[ pos ] ] += 1;
793
794 }
795
796 uint32_t differences2 = 0;
797 for ( uint32_t pos = 0; pos < max_depth; pos++ )
798 {
799 if ( depths1[ pos ] != depths2[ pos ] )
800 {
801 std::cout << pos << " : " << depths1[ pos ] << " , " << depths2[ pos ] << std::endl;
802 differences2++;
803 }
804 }
805
806 REQUIRE_EQUAL( differences1, (uint32_t)0 );
807 REQUIRE_EQUAL( differences2, (uint32_t)0 );
808
809 rc = ReleasePileupEstimator( estim );
810 REQUIRE_RC( rc );
811
812 free( ( void * ) coverage1 );
813 free( ( void * ) coverage2 );
814 }
815 }
816
817
TEST_CASE(Estimator_17)818 TEST_CASE ( Estimator_17 )
819 {
820 std::cout << "Estimator-Test #17 ( RunCoverage vs ngs-pileup )" << std::endl;
821
822 struct PileupEstimator * estim;
823 rc_t rc = MakePileupEstimator( &estim, ACC3, 0, NULL, NULL, 0, true );
824 REQUIRE_RC( rc );
825 if ( rc == 0 )
826 {
827 uint32_t * coverage1 = ( uint32_t * )calloc( slice3_len, sizeof * coverage1 );
828 uint32_t * coverage2 = ( uint32_t * )calloc( slice3_len, sizeof * coverage2 );
829 uint32_t depths1[ max_depth ];
830 uint32_t depths2[ max_depth ];
831
832 memset( depths1, 0, sizeof depths1 );
833 memset( depths2, 0, sizeof depths2 );
834
835 String rname;
836 StringInitCString( &rname, ACC3_REF );
837
838 rc = RunCoverage( estim, &rname, slice3_start, slice3_len, coverage1 );
839 REQUIRE_RC( rc );
840
841 calc_coverage_using_ngs( ACC3, ACC3_REF, slice3_start, slice3_len, coverage2 );
842
843 uint32_t differences1 = 0;
844 for ( uint32_t pos = 0; pos < slice3_len; pos++ )
845 {
846 if ( coverage1[ pos ] != coverage2[ pos ] )
847 {
848 std::cout << ( slice3_start + pos ) << " : " << coverage1[ pos ] << " , " << coverage2[ pos ] << std::endl;
849 differences1++;
850 }
851
852 if ( coverage1[ pos ] >= max_depth )
853 depths1[ max_depth - 1 ] += 1;
854 else
855 depths1[ coverage1[ pos ] ] += 1;
856
857 if ( coverage2[ pos ] >= max_depth )
858 depths2[ max_depth - 1 ] += 1;
859 else
860 depths2[ coverage2[ pos ] ] += 1;
861
862 }
863
864 uint32_t differences2 = 0;
865 for ( uint32_t pos = 0; pos < max_depth; pos++ )
866 {
867 if ( depths1[ pos ] != depths2[ pos ] )
868 {
869 std::cout << pos << " : " << depths1[ pos ] << " , " << depths2[ pos ] << std::endl;
870 differences2++;
871 }
872 }
873
874 REQUIRE_EQUAL( differences1, (uint32_t)0 );
875 REQUIRE_EQUAL( differences2, (uint32_t)0 );
876
877 rc = ReleasePileupEstimator( estim );
878 REQUIRE_RC( rc );
879
880 free( ( void * ) coverage1 );
881 free( ( void * ) coverage2 );
882 }
883
884 }
885
886
TEST_CASE(Estimator_18)887 TEST_CASE ( Estimator_18 )
888 {
889 std::cout << "Estimator-Test #18 ( loop through the references )" << std::endl;
890 struct PileupEstimator * estim;
891 rc_t rc = MakePileupEstimator( &estim, ACC3, 0, NULL, NULL, 0, false );
892 REQUIRE_RC( rc );
893
894 const ReferenceList *reflist;
895 const VDBManager *mgr;
896 rc = VDBManagerMakeRead( &mgr, NULL );
897 REQUIRE_RC( rc );
898
899 rc = ReferenceList_MakePath( &reflist, mgr, ACC3, ereferencelist_usePrimaryIds, 0, NULL, 0 );
900 REQUIRE_RC( rc );
901
902 uint32_t count1, count2;
903 rc = EstimatorRefCount( estim, &count1 );
904 REQUIRE_RC( rc );
905
906 rc = ReferenceList_Count( reflist, &count2 );
907 REQUIRE_RC( rc );
908
909 REQUIRE_EQUAL( count1, count2 );
910 std::cout << "count1 : " << count1 << " count2 : " << count2 << std::endl;
911
912 for ( uint32_t idx = 0; rc == 0 && idx < count1; ++idx )
913 {
914 String refname;
915 uint64_t reflen;
916
917 rc = EstimatorRefInfo( estim, idx, &refname, &reflen );
918 REQUIRE_RC( rc );
919
920 const ReferenceObj *refobj;
921 rc = ReferenceList_Get( reflist, &refobj, idx );
922 REQUIRE_RC( rc );
923
924 const char * seqid;
925 rc = ReferenceObj_SeqId( refobj, &seqid );
926 REQUIRE_RC( rc );
927
928 String SeqId;
929 StringInitCString( &SeqId, seqid );
930
931 int cmp = StringCompare( &refname, &SeqId );
932 REQUIRE_EQUAL( cmp, (int)0 );
933
934 INSDC_coord_len seqlen;
935 rc = ReferenceObj_SeqLength( refobj, &seqlen );
936 REQUIRE_RC( rc );
937 REQUIRE_EQUAL( (uint64_t)seqlen, reflen );
938
939 ReferenceObj_Release( refobj );
940 //std::cout << " [" << idx << "] : " << refname.addr << " . " << reflen << std::endl;
941 }
942
943 ReferenceList_Release( reflist ); // has no return-value!
944 rc = VDBManagerRelease( mgr );
945 REQUIRE_RC( rc );
946
947 rc = ReleasePileupEstimator( estim );
948 REQUIRE_RC( rc );
949 }
950
951
952 //////////////////////////////////////////// Main
953 #include <kapp/args.h>
954 #include <klib/out.h>
955 #include <kfg/config.h>
956
957 extern "C"
958 {
959
KAppVersion(void)960 ver_t CC KAppVersion ( void )
961 {
962 return 0x1000000;
963 }
964
965 const char UsageDefaultName[] = "test-estimator";
966
UsageSummary(const char * progname)967 rc_t CC UsageSummary (const char * progname)
968 {
969 return KOutMsg ( "Usage:\n" "\t%s [options]\n\n", progname );
970 }
971
Usage(const Args * args)972 rc_t CC Usage( const Args* args )
973 {
974 return 0;
975 }
976
KMain(int argc,char * argv[])977 rc_t CC KMain ( int argc, char *argv [] )
978 {
979 return PileupEstimatorTestSuite( argc, argv );
980 }
981
982 }
983
984