1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include <align/extern.h>
28 
29 #include <klib/rc.h>
30 #include <klib/text.h>
31 #include <klib/vector.h>
32 
33 #include <vdb/manager.h>
34 #include <vdb/database.h>
35 #include <vdb/table.h>
36 #include <vdb/cursor.h>
37 
38 #include <insdc/sra.h>
39 
40 #define DFLT_CACHE_SIZE 1024 * 1024 * 32
41 
42 typedef struct PileupEstimator
43 {
44     const VCursor * ref_cursor;
45     const VCursor * align_cursor;
46 
47 	uint32_t * coverage;
48 	size_t coverage_len;
49 
50     Vector reftable;
51 
52 	uint64_t cutoff_value;
53 
54     uint32_t ref_cursor_idx_SEQ_ID;
55     uint32_t ref_cursor_idx_SEQ_LEN;
56     uint32_t ref_cursor_idx_MAX_SEQ_LEN;
57     uint32_t ref_cursor_idx_PRIMARY_ALIGNMENT_IDS;
58 
59     uint32_t align_cursor_idx_REF_POS;
60     uint32_t align_cursor_idx_REF_LEN;
61     uint32_t align_cursor_idx_READ_FILTER;
62 
63     uint32_t max_seq_len;
64     bool use_read_filter;
65 } PileupEstimator;
66 
67 
68 typedef struct RefEntry
69 {
70     const String *rname;        /* the name of the reference */
71     int64_t start_row_id;       /* the start-row-id of this reference in the REFERENCE-table */
72     uint64_t count;             /* how many rows for this reference in the REFERENCE-table */
73     uint64_t reflen;            /* how many bases does this reference have */
74 } RefEntry;
75 
76 
RefEntry_release(RefEntry * ref)77 static void RefEntry_release( RefEntry * ref )
78 {
79     if ( ref != NULL )
80     {
81         StringWhack( ref->rname );
82         free( ( void * )ref );
83     }
84 }
85 
make_ref_entry(RefEntry ** self,const String * rname,int64_t start_row_id,uint32_t len)86 static rc_t make_ref_entry( RefEntry **self, const String * rname, int64_t start_row_id, uint32_t len )
87 {
88     rc_t rc = 0;
89     RefEntry * o = calloc( 1, sizeof *o );
90     if ( o == NULL )
91         rc = RC( rcAlign, rcQuery, rcConstructing, rcMemory, rcExhausted );
92     else
93     {
94         o->start_row_id = start_row_id;
95         o->reflen = len;
96         rc = StringCopy( &o->rname, rname );
97         if ( rc == 0 )
98             *self = o;
99         else
100             RefEntry_release( o );
101     }
102     return rc;
103 }
104 
RefEntry_releaes_callback(void * item,void * data)105 static void CC RefEntry_releaes_callback( void * item, void * data )
106 {
107     RefEntry_release( item );
108 }
109 
ReleasePileupEstimator(struct PileupEstimator * self)110 LIB_EXPORT rc_t CC ReleasePileupEstimator( struct PileupEstimator *self )
111 {
112     rc_t rc = 0;
113     if ( self != NULL )
114     {
115 		if ( self->coverage != NULL )
116 			free( ( void * ) self->coverage );
117         if ( self->ref_cursor != NULL )
118             rc = VCursorRelease( self->ref_cursor );
119         if ( rc == 0 && self->align_cursor != NULL )
120             rc = VCursorRelease( self->align_cursor );
121         VectorWhack( &self->reftable, RefEntry_releaes_callback, NULL );
122         if ( rc == 0 )
123             free( ( void * )self );
124     }
125     return rc;
126 }
127 
128 
129 /* ===================================================================================================== */
130 
131 
AddRefCursor(PileupEstimator * self,const VCursor * ref_cursor)132 static rc_t AddRefCursor( PileupEstimator *self, const VCursor * ref_cursor )
133 {
134     rc_t rc = VCursorAddRef( ref_cursor );
135     if ( rc == 0 )
136         self->ref_cursor = ref_cursor;
137     if ( rc == 0 )
138         rc = VCursorGetColumnIdx( ref_cursor, &self->ref_cursor_idx_SEQ_ID, "SEQ_ID" );
139     if ( rc == 0 )
140         rc = VCursorGetColumnIdx( ref_cursor, &self->ref_cursor_idx_SEQ_LEN, "SEQ_LEN" );
141     if ( rc == 0 )
142         rc = VCursorGetColumnIdx( ref_cursor, &self->ref_cursor_idx_MAX_SEQ_LEN, "MAX_SEQ_LEN" );
143     if ( rc == 0 )
144         rc = VCursorGetColumnIdx( ref_cursor, &self->ref_cursor_idx_PRIMARY_ALIGNMENT_IDS, "PRIMARY_ALIGNMENT_IDS" );
145     return rc;
146 }
147 
MakeRefCursor(PileupEstimator * self,const VDatabase * db,size_t cursor_cache_size)148 static rc_t MakeRefCursor( PileupEstimator *self, const VDatabase * db, size_t cursor_cache_size )
149 {
150     const VTable * tbl;
151     rc_t rc = VDatabaseOpenTableRead( db, &tbl, "%s", "REFERENCE" );
152     if ( rc == 0 )
153     {
154         rc = VTableCreateCachedCursorRead( tbl, &self->ref_cursor, cursor_cache_size );
155         if ( rc == 0 )
156             rc = VCursorAddColumn( self->ref_cursor, &self->ref_cursor_idx_SEQ_ID, "SEQ_ID" );
157         if ( rc == 0 )
158             rc = VCursorAddColumn( self->ref_cursor, &self->ref_cursor_idx_SEQ_LEN, "SEQ_LEN" );
159         if ( rc == 0 )
160             rc = VCursorAddColumn( self->ref_cursor, &self->ref_cursor_idx_MAX_SEQ_LEN, "MAX_SEQ_LEN" );
161         if ( rc == 0 )
162             rc = VCursorAddColumn( self->ref_cursor, &self->ref_cursor_idx_PRIMARY_ALIGNMENT_IDS, "PRIMARY_ALIGNMENT_IDS" );
163         if ( rc == 0 )
164             rc = VCursorOpen( self->ref_cursor );
165         VTableRelease( tbl );
166     }
167     return rc;
168 }
169 
AddAlignCursor(PileupEstimator * self,const VCursor * align_cursor)170 static rc_t AddAlignCursor( PileupEstimator *self, const VCursor * align_cursor )
171 {
172     rc_t rc = VCursorAddRef( align_cursor );
173     if ( rc == 0 )
174         self->align_cursor = align_cursor;
175     if ( rc == 0 )
176         rc = VCursorGetColumnIdx( align_cursor, &self->align_cursor_idx_REF_POS, "REF_POS" );
177     if ( rc == 0 )
178         rc = VCursorGetColumnIdx( align_cursor, &self->align_cursor_idx_REF_LEN, "REF_LEN" );
179     if ( rc == 0 && self->use_read_filter )
180         rc = VCursorGetColumnIdx( align_cursor, &self->align_cursor_idx_READ_FILTER, "READ_FILTER" );
181     return rc;
182 }
183 
MakeAlignCursor(PileupEstimator * self,const VDatabase * db,size_t cursor_cache_size)184 static rc_t MakeAlignCursor( PileupEstimator *self, const VDatabase * db, size_t cursor_cache_size )
185 {
186     const VTable * tbl;
187     rc_t rc = VDatabaseOpenTableRead( db, &tbl, "%s", "PRIMARY_ALIGNMENT" );
188     if ( rc == 0 )
189     {
190         rc = VTableCreateCachedCursorRead( tbl, &self->align_cursor, cursor_cache_size );
191         if ( rc == 0 )
192             rc = VCursorAddColumn( self->align_cursor, &self->align_cursor_idx_REF_POS, "REF_POS" );
193         if ( rc == 0 )
194             rc = VCursorAddColumn( self->align_cursor, &self->align_cursor_idx_REF_LEN, "REF_LEN" );
195         if ( rc == 0 && self->use_read_filter )
196             rc = VCursorAddColumn( self->align_cursor, &self->align_cursor_idx_READ_FILTER, "READ_FILTER" );
197         if ( rc == 0 )
198             rc = VCursorOpen( self->align_cursor );
199         VTableRelease( tbl );
200     }
201     return rc;
202 }
203 
InitializePileupEstimator(PileupEstimator * self,const char * source,size_t cursor_cache_size,const VCursor * ref_cursor,const VCursor * align_cursor,uint64_t cutoff_value,bool use_read_filter)204 static rc_t InitializePileupEstimator( PileupEstimator *self,
205         const char * source,
206         size_t cursor_cache_size,
207         const VCursor * ref_cursor,
208         const VCursor * align_cursor,
209 		uint64_t cutoff_value,
210         bool use_read_filter )
211 {
212     rc_t rc = 0;
213     VectorInit( &self->reftable, 0, 25 );
214 	self->cutoff_value = cutoff_value;
215     self->use_read_filter = use_read_filter;
216     if ( ref_cursor == NULL || align_cursor == NULL )
217     {
218         if ( source == NULL )
219             rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcNull );
220         else
221         {
222             const VDBManager * mgr;
223             rc = VDBManagerMakeRead( &mgr, NULL );
224             if ( rc == 0 )
225             {
226                 const VDatabase * db;
227                 rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", source );
228                 if ( rc == 0 )
229                 {
230                     if ( cursor_cache_size == 0 )
231                         cursor_cache_size = DFLT_CACHE_SIZE;
232 
233                     if ( ref_cursor != NULL )
234                         rc = AddRefCursor( self, ref_cursor );
235                     else
236                         rc = MakeRefCursor( self, db, cursor_cache_size );
237 
238                     if ( rc == 0 )
239                     {
240                         if ( align_cursor != NULL )
241                             rc = AddAlignCursor( self, align_cursor );
242                         else
243                             rc = MakeAlignCursor( self, db, cursor_cache_size );
244                     }
245                     VDatabaseRelease( db );
246                 }
247                 VDBManagerRelease( mgr );
248             }
249         }
250     }
251     else
252     {
253         rc = AddRefCursor( self, ref_cursor );
254         if ( rc == 0 )
255             rc = AddAlignCursor( self, align_cursor );
256     }
257     return rc;
258 }
259 
260 
MakePileupEstimator(struct PileupEstimator ** self,const char * source,size_t cursor_cache_size,const struct VCursor * ref_cursor,const struct VCursor * align_cursor,uint64_t cutoff_value,bool use_read_filter)261 LIB_EXPORT rc_t CC MakePileupEstimator( struct PileupEstimator **self,
262         const char * source,
263         size_t cursor_cache_size,
264         const struct VCursor * ref_cursor,
265         const struct VCursor * align_cursor,
266 		uint64_t cutoff_value,
267         bool use_read_filter )
268 {
269     rc_t rc = 0;
270     if ( self == NULL )
271         rc = RC( rcAlign, rcQuery, rcConstructing, rcSelf, rcNull );
272     else
273     {
274         PileupEstimator * o = calloc( 1, sizeof *o );
275         *self = NULL;
276         if ( o == NULL )
277             rc = RC( rcAlign, rcQuery, rcConstructing, rcMemory, rcExhausted );
278         else
279         {
280             rc = InitializePileupEstimator( o, source, cursor_cache_size,
281 											ref_cursor, align_cursor,
282                                             cutoff_value, use_read_filter );
283             if ( rc == 0 )
284                 *self = o;
285             else
286                 ReleasePileupEstimator( o );
287         }
288     }
289     return rc;
290 }
291 
292 /* ===================================================================================================== */
293 
ScanRefTable(PileupEstimator * self)294 static rc_t ScanRefTable( PileupEstimator *self )
295 {
296     int64_t row;
297     uint64_t row_idx, count;
298     RefEntry * ref_entry = NULL;
299     String rname;
300     uint32_t * ptr;
301     uint32_t elem_bits, boff, row_len;
302 
303     rc_t rc = VCursorIdRange( self->ref_cursor, self->ref_cursor_idx_SEQ_ID, &row, &count );
304     for( row_idx = 0; rc == 0 && row_idx < count; ++row, ++row_idx )
305     {
306         /* get max_seq_len if we do not have it yet */
307         if ( self->max_seq_len == 0 )
308         {
309             rc = VCursorCellDataDirect( self->ref_cursor, row, self->ref_cursor_idx_MAX_SEQ_LEN, &elem_bits,
310                                         ( const void ** )&ptr, &boff, &row_len );
311             if ( rc == 0 && row_len < 1 )
312                 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcInvalid );
313             if ( rc == 0 && ptr[ 0 ] == 0 )
314                 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcInvalid );
315             if ( rc == 0 )
316                 self->max_seq_len = ptr[ 0 ];
317         }
318 
319         /* get the REFERENCE-NAME */
320         if ( rc == 0 )
321         {
322             rc = VCursorCellDataDirect( self->ref_cursor, row, self->ref_cursor_idx_SEQ_ID, &elem_bits,
323                                         ( const void ** )&rname.addr, &boff, &rname.len );
324             if ( rc == 0 && rname.len < 1 )
325                 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcInvalid );
326             if ( rc == 0 )
327                 rname.size = rname.len;
328         }
329 
330         /* get the SEQ_LEN */
331         if ( rc == 0 )
332         {
333             rc = VCursorCellDataDirect( self->ref_cursor, row, self->ref_cursor_idx_SEQ_LEN, &elem_bits,
334                                         ( const void ** )&ptr, &boff, &row_len );
335             if ( rc == 0 && row_len < 1 )
336                 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcInvalid );
337         }
338 
339         /* do the counting/inserting ... */
340         if ( rc == 0 )
341         {
342             if ( ref_entry == NULL )
343                 rc = make_ref_entry( &ref_entry, &rname, row, ptr[ 0 ] );
344             else
345             {
346                 int cmp = StringCompare( ref_entry->rname, &rname );
347                 if ( cmp == 0 )
348                 {
349                     ref_entry->count += 1;
350                     ref_entry->reflen += ptr[ 0 ];
351                 }
352                 else
353                 {
354                     rc = VectorAppend( &self->reftable, NULL, ref_entry );
355                     if ( rc == 0 )
356                         rc = make_ref_entry( &ref_entry, &rname, row, ptr[ 0 ] );
357                 }
358             }
359         }
360     }
361     /* insert the last entry */
362     if ( rc == 0 && ref_entry != NULL )
363     {
364         rc = VectorAppend( &self->reftable, NULL, ref_entry );
365     }
366     return rc;
367 }
368 
FindRefEntry(PileupEstimator * self,RefEntry ** entry,const String * rname)369 static rc_t FindRefEntry( PileupEstimator *self, RefEntry ** entry, const String * rname )
370 {
371     rc_t rc = 0;
372     uint32_t idx, count = VectorLength( &self->reftable );
373     *entry = NULL;
374     for( idx = 0; ( rc == 0 ) && ( idx < count ) && ( *entry == NULL ); ++idx )
375     {
376         RefEntry * e = VectorGet( &self->reftable, idx );
377         if ( e == NULL )
378             rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
379         else if ( 0 == StringCompare( e->rname, rname ) )
380             *entry = e;
381     }
382     if ( *entry == NULL )
383         rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcNotFound );
384     return rc;
385 }
386 
387 
PerformEstimation(PileupEstimator * self,RefEntry * entry,uint64_t slice_start,uint32_t slice_len,uint64_t slice_end,uint64_t * result)388 static rc_t PerformEstimation( PileupEstimator *self,
389                                RefEntry * entry,
390                                uint64_t slice_start,
391                                uint32_t slice_len,
392                                uint64_t slice_end,
393                                uint64_t * result )
394 {
395     rc_t rc = 0;
396 
397 	if ( self->coverage == NULL )
398 	{
399 		self->coverage = calloc( slice_len, sizeof *( self->coverage ) );
400 		self->coverage_len = slice_len;
401 	}
402 	else if ( self->coverage_len < slice_len )
403 	{
404 		free( ( void * ) self->coverage );
405 		self->coverage = calloc( slice_len, sizeof *( self->coverage ) );
406 		self->coverage_len = slice_len;
407 	}
408 	else
409 	{
410 		memset( self->coverage, 0, self->coverage_len );
411 	}
412 
413     if ( self->coverage == NULL )
414         rc = RC( rcAlign, rcQuery, rcConstructing, rcMemory, rcExhausted );
415     else
416     {
417 		uint64_t cutoff_sum = 0;
418         int64_t ref_row_id = entry->start_row_id;
419         uint32_t ref_row_count, prim_id_count, prim_id_idx, ref_pos_count, ref_len_count, read_filter_len, elem_bits, boff;
420         int64_t * prim_ids;
421         uint32_t * ref_pos_ptr;
422         uint32_t * ref_len_ptr;
423         uint8_t * read_filter_ptr;
424         bool done = false;
425 		bool filtered_out;
426 
427         /* adjust start_ref_row and ref_row_count to the given slice... */
428         {
429             int64_t start_offset = slice_start / self->max_seq_len;
430             int64_t end_offset = slice_end / self->max_seq_len;
431             if ( start_offset > 0 ) start_offset--;
432             ref_row_id += start_offset;
433             ref_row_count = ( end_offset - start_offset ) + 1;
434         }
435 
436         /* for each row in REFERENCE that matches the given slice... */
437         for( ; rc == 0 && !done && ref_row_count > 0; ref_row_id++, ref_row_count-- )
438         {
439             /* get the primary alignment-id's */
440             rc = VCursorCellDataDirect( self->ref_cursor, ref_row_id, self->ref_cursor_idx_PRIMARY_ALIGNMENT_IDS,
441                                         &elem_bits, ( const void ** )&prim_ids, &boff, &prim_id_count );
442 
443 			/* doing the cutoff-check */
444 			if ( self->cutoff_value > 0 )
445 			{
446 				cutoff_sum += prim_id_count;
447 				if ( cutoff_sum >= self->cutoff_value )
448 				{
449 					*result = UINTMAX_MAX;
450 					done = true;
451 				}
452 			}
453 
454             /* for each alignment-id found */
455             for ( prim_id_idx = 0; rc == 0 && !done && prim_id_idx < prim_id_count; prim_id_idx++ )
456             {
457                 /* get REF_POS */
458                 int64_t prim_id = prim_ids[ prim_id_idx ];
459                 filtered_out = false;
460                 rc = VCursorCellDataDirect( self->align_cursor, prim_id, self->align_cursor_idx_REF_POS,
461                             &elem_bits, ( const void ** )&ref_pos_ptr, &boff, &ref_pos_count );
462                 if ( ref_pos_count != 1 )
463                     rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
464 
465                 /* get REF_LEN */
466                 if ( rc == 0 )
467                 {
468                     rc = VCursorCellDataDirect( self->align_cursor, prim_id, self->align_cursor_idx_REF_LEN,
469                                 &elem_bits, ( const void ** )&ref_len_ptr, &boff, &ref_len_count );
470                     if ( ref_len_count != 1 )
471                         rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
472                 }
473 
474                 /* get READ_FILTER */
475                 if ( rc == 0 && self->use_read_filter )
476                 {
477                     rc = VCursorCellDataDirect( self->align_cursor, prim_id, self->align_cursor_idx_READ_FILTER,
478                                 &elem_bits, ( const void ** )&read_filter_ptr, &boff, &read_filter_len );
479                     if ( read_filter_len > 0 )
480                         filtered_out = ( ( read_filter_ptr[ 0 ] == SRA_READ_FILTER_REJECT )||
481                                          ( read_filter_ptr[ 0 ] == SRA_READ_FILTER_CRITERIA ) );
482                 }
483 
484                 /* enter the coverage */
485                 if ( rc == 0 && !filtered_out )
486                 {
487                     uint32_t ref_pos = ref_pos_ptr[ 0 ];
488                     uint32_t ref_len = ref_len_ptr[ 0 ];
489                     if ( ( ( ref_pos + ref_len - 1 ) >= slice_start ) && ( ref_pos <= slice_end ) )
490                     {
491                         int32_t rel_start = ( ref_pos - ( uint32_t )slice_start );
492                         int32_t i, j;
493 
494                         if ( rel_start < 0 )
495                         {
496                             ref_len += rel_start;
497                             rel_start = 0;
498                         }
499                         for ( i = rel_start, j = 0; j < ref_len && i < slice_len; ++i, ++j )
500                             self->coverage[ i ]++; /* <==== */
501                     }
502                 }
503             }
504         }
505 
506         /* sum up the bases in the slice */
507         if ( rc == 0 && !done )
508         {
509             uint32_t idx;
510             uint64_t sum = 0;
511             for ( idx = 0; idx < slice_len; ++idx ) sum += self->coverage[ idx ];
512             *result = sum;
513         }
514     }
515     return rc;
516 }
517 
RunPileupEstimator(struct PileupEstimator * self,const String * refname,uint64_t slice_start,uint32_t slice_len,uint64_t * result)518 LIB_EXPORT rc_t CC RunPileupEstimator( struct PileupEstimator *self,
519         const String * refname,
520         uint64_t slice_start,
521         uint32_t slice_len,
522         uint64_t * result )
523 {
524     rc_t rc = 0;
525     if ( self == NULL )
526         rc = RC( rcAlign, rcQuery, rcAccessing, rcSelf, rcNull );
527     else if ( refname == NULL || result == NULL )
528         rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcNull );
529     else if ( slice_len == 0 )
530         rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcInvalid );
531     else
532     {
533         *result = 0;
534         /* we are using max_seq_len as a flag to determine if we have to scan the reference-table */
535         if ( self->max_seq_len == 0 )
536             rc = ScanRefTable( self );
537         if ( rc == 0 )
538         {
539             RefEntry * ref_entry;
540             rc = FindRefEntry( self, &ref_entry, refname );
541             if ( rc == 0 )
542             {
543                 uint64_t slice_end = slice_start + slice_len - 1;
544                 if ( slice_start >= ref_entry->reflen || slice_end >= ref_entry->reflen )
545                     rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
546                 else
547                     rc = PerformEstimation( self, ref_entry, slice_start, slice_len, slice_end, result );
548             }
549         }
550     }
551     return rc;
552 }
553 
554 
PerformCoverage(PileupEstimator * self,RefEntry * entry,uint64_t slice_start,uint32_t slice_len,uint64_t slice_end,uint32_t * coverage)555 static rc_t PerformCoverage( PileupEstimator *self,
556                              RefEntry * entry,
557                              uint64_t slice_start,
558                              uint32_t slice_len,
559                              uint64_t slice_end,
560                              uint32_t * coverage )
561 {
562     rc_t rc = 0;
563 
564     int64_t ref_row_id = entry->start_row_id;
565     uint32_t ref_row_count, prim_id_count, prim_id_idx, ref_pos_count, ref_len_count, read_filter_len, elem_bits, boff;
566     int64_t * prim_ids;
567     uint32_t * ref_pos_ptr;
568     uint32_t * ref_len_ptr;
569     uint8_t * read_filter_ptr;
570     bool filtered_out;
571 
572     /* adjust start_ref_row and ref_row_count to the given slice... */
573     {
574         int64_t start_offset = slice_start / self->max_seq_len;
575         int64_t end_offset = slice_end / self->max_seq_len;
576         if ( start_offset > 0 ) start_offset--;
577         ref_row_id += start_offset;
578         ref_row_count = ( end_offset - start_offset ) + 1;
579     }
580 
581     /* for each row in REFERENCE that matches the given slice... */
582     for( ; rc == 0 &&  ref_row_count > 0; ref_row_id++, ref_row_count-- )
583     {
584         /* get the primary alignment-id's */
585         rc = VCursorCellDataDirect( self->ref_cursor, ref_row_id, self->ref_cursor_idx_PRIMARY_ALIGNMENT_IDS,
586                                     &elem_bits, ( const void ** )&prim_ids, &boff, &prim_id_count );
587 
588         /* for each alignment-id found */
589         for ( prim_id_idx = 0; rc == 0 && prim_id_idx < prim_id_count; prim_id_idx++ )
590         {
591             /* get REF_POS */
592             int64_t prim_id = prim_ids[ prim_id_idx ];
593             filtered_out = false;
594             rc = VCursorCellDataDirect( self->align_cursor, prim_id, self->align_cursor_idx_REF_POS,
595                         &elem_bits, ( const void ** )&ref_pos_ptr, &boff, &ref_pos_count );
596             if ( ref_pos_count != 1 )
597                 rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
598 
599             /* get REF_LEN */
600             if ( rc == 0 )
601             {
602                 rc = VCursorCellDataDirect( self->align_cursor, prim_id, self->align_cursor_idx_REF_LEN,
603                             &elem_bits, ( const void ** )&ref_len_ptr, &boff, &ref_len_count );
604                 if ( ref_len_count != 1 )
605                     rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
606             }
607 
608             /* get READ_FILTER */
609             if ( rc == 0 && self->use_read_filter )
610             {
611                 rc = VCursorCellDataDirect( self->align_cursor, prim_id, self->align_cursor_idx_READ_FILTER,
612                             &elem_bits, ( const void ** )&read_filter_ptr, &boff, &read_filter_len );
613                 if ( read_filter_len > 0 )
614                     filtered_out = ( ( read_filter_ptr[ 0 ] == SRA_READ_FILTER_REJECT )||
615                                      ( read_filter_ptr[ 0 ] == SRA_READ_FILTER_CRITERIA ) );
616             }
617 
618             /* enter the coverage */
619             if ( rc == 0 && !filtered_out )
620             {
621                 uint64_t ref_pos = ref_pos_ptr[ 0 ];
622                 uint64_t ref_len = ref_len_ptr[ 0 ];
623                 if ( ( ( ref_pos + ref_len - 1 ) >= slice_start ) && ( ref_pos <= slice_end ) )
624                 {
625                     int64_t j;
626                     int64_t rel_pos = ref_pos;
627                     rel_pos -= slice_start;
628 
629                     if ( rel_pos < 0 )
630                     {
631                         ref_len += rel_pos;
632                         rel_pos = 0;
633                     }
634 
635                     for ( j = 0; j < ref_len && rel_pos < slice_len; ++rel_pos, ++j )
636                         coverage[ rel_pos ]++; /* <==== */
637                 }
638             }
639         }
640     }
641     return rc;
642 }
643 
644 
RunCoverage(struct PileupEstimator * self,const String * refname,uint64_t slice_start,uint32_t slice_len,uint32_t * coverage)645 LIB_EXPORT rc_t CC RunCoverage( struct PileupEstimator *self,
646                                 const String * refname,
647                                 uint64_t slice_start,
648                                 uint32_t slice_len,
649                                 uint32_t * coverage )
650 {
651     rc_t rc = 0;
652     if ( self == NULL )
653         rc = RC( rcAlign, rcQuery, rcAccessing, rcSelf, rcNull );
654     else if ( refname == NULL || coverage == NULL )
655         rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcNull );
656     else if ( slice_len == 0 )
657         rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcInvalid );
658     else
659     {
660         /* we are using max_seq_len as a flag to determine if we have to scan the reference-table */
661         if ( self->max_seq_len == 0 )
662             rc = ScanRefTable( self );
663         if ( rc == 0 )
664         {
665             RefEntry * ref_entry;
666             rc = FindRefEntry( self, &ref_entry, refname );
667             if ( rc == 0 )
668             {
669                 uint64_t slice_end = slice_start + slice_len - 1;
670                 if ( slice_start >= ref_entry->reflen || slice_end >= ref_entry->reflen )
671                     rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
672                 else
673                 {
674                     memset( coverage, 0, slice_len * ( sizeof *coverage ) );
675                     rc = PerformCoverage( self, ref_entry, slice_start, slice_len, slice_end, coverage );
676                 }
677             }
678         }
679 
680     }
681     return rc;
682 }
683 
684 
EstimatorRefCount(struct PileupEstimator * self,uint32_t * count)685 LIB_EXPORT rc_t CC EstimatorRefCount( struct PileupEstimator *self, uint32_t * count )
686 {
687     rc_t rc = 0;
688     if ( self == NULL )
689         rc = RC( rcAlign, rcQuery, rcAccessing, rcSelf, rcNull );
690     else if ( count == NULL )
691         rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcNull );
692     else
693     {
694         /* we are using max_seq_len as a flag to determine if we have to scan the reference-table */
695         if ( self->max_seq_len == 0 )
696             rc = ScanRefTable( self );
697         if ( rc == 0 )
698             *count = VectorLength( &self->reftable );
699     }
700     return rc;
701 }
702 
703 
EstimatorRefInfo(struct PileupEstimator * self,uint32_t idx,String * refname,uint64_t * reflen)704 LIB_EXPORT rc_t CC EstimatorRefInfo( struct PileupEstimator *self,
705                                      uint32_t idx,
706                                      String * refname,
707                                      uint64_t * reflen )
708 {
709     rc_t rc = 0;
710     if ( self == NULL )
711         rc = RC( rcAlign, rcQuery, rcAccessing, rcSelf, rcNull );
712     else if ( refname == NULL || reflen == NULL )
713         rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcNull );
714     else
715     {
716         if ( self->max_seq_len == 0 )
717             rc = ScanRefTable( self );
718         if ( rc == 0 )
719         {
720             RefEntry * e = VectorGet( &self->reftable, idx );
721             if ( e == NULL )
722                 rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
723             else
724             {
725                 StringInit( refname, e->rname->addr, e->rname->size, e->rname->len );
726                 *reflen = e->reflen;
727             }
728         }
729     }
730     return rc;
731 }
732