1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <align/extern.h>
28
29 #include <klib/rc.h>
30 #include <klib/text.h>
31 #include <klib/vector.h>
32
33 #include <vdb/manager.h>
34 #include <vdb/database.h>
35 #include <vdb/table.h>
36 #include <vdb/cursor.h>
37
38 #include <insdc/sra.h>
39
40 #define DFLT_CACHE_SIZE 1024 * 1024 * 32
41
42 typedef struct PileupEstimator
43 {
44 const VCursor * ref_cursor;
45 const VCursor * align_cursor;
46
47 uint32_t * coverage;
48 size_t coverage_len;
49
50 Vector reftable;
51
52 uint64_t cutoff_value;
53
54 uint32_t ref_cursor_idx_SEQ_ID;
55 uint32_t ref_cursor_idx_SEQ_LEN;
56 uint32_t ref_cursor_idx_MAX_SEQ_LEN;
57 uint32_t ref_cursor_idx_PRIMARY_ALIGNMENT_IDS;
58
59 uint32_t align_cursor_idx_REF_POS;
60 uint32_t align_cursor_idx_REF_LEN;
61 uint32_t align_cursor_idx_READ_FILTER;
62
63 uint32_t max_seq_len;
64 bool use_read_filter;
65 } PileupEstimator;
66
67
68 typedef struct RefEntry
69 {
70 const String *rname; /* the name of the reference */
71 int64_t start_row_id; /* the start-row-id of this reference in the REFERENCE-table */
72 uint64_t count; /* how many rows for this reference in the REFERENCE-table */
73 uint64_t reflen; /* how many bases does this reference have */
74 } RefEntry;
75
76
RefEntry_release(RefEntry * ref)77 static void RefEntry_release( RefEntry * ref )
78 {
79 if ( ref != NULL )
80 {
81 StringWhack( ref->rname );
82 free( ( void * )ref );
83 }
84 }
85
make_ref_entry(RefEntry ** self,const String * rname,int64_t start_row_id,uint32_t len)86 static rc_t make_ref_entry( RefEntry **self, const String * rname, int64_t start_row_id, uint32_t len )
87 {
88 rc_t rc = 0;
89 RefEntry * o = calloc( 1, sizeof *o );
90 if ( o == NULL )
91 rc = RC( rcAlign, rcQuery, rcConstructing, rcMemory, rcExhausted );
92 else
93 {
94 o->start_row_id = start_row_id;
95 o->reflen = len;
96 rc = StringCopy( &o->rname, rname );
97 if ( rc == 0 )
98 *self = o;
99 else
100 RefEntry_release( o );
101 }
102 return rc;
103 }
104
RefEntry_releaes_callback(void * item,void * data)105 static void CC RefEntry_releaes_callback( void * item, void * data )
106 {
107 RefEntry_release( item );
108 }
109
ReleasePileupEstimator(struct PileupEstimator * self)110 LIB_EXPORT rc_t CC ReleasePileupEstimator( struct PileupEstimator *self )
111 {
112 rc_t rc = 0;
113 if ( self != NULL )
114 {
115 if ( self->coverage != NULL )
116 free( ( void * ) self->coverage );
117 if ( self->ref_cursor != NULL )
118 rc = VCursorRelease( self->ref_cursor );
119 if ( rc == 0 && self->align_cursor != NULL )
120 rc = VCursorRelease( self->align_cursor );
121 VectorWhack( &self->reftable, RefEntry_releaes_callback, NULL );
122 if ( rc == 0 )
123 free( ( void * )self );
124 }
125 return rc;
126 }
127
128
129 /* ===================================================================================================== */
130
131
AddRefCursor(PileupEstimator * self,const VCursor * ref_cursor)132 static rc_t AddRefCursor( PileupEstimator *self, const VCursor * ref_cursor )
133 {
134 rc_t rc = VCursorAddRef( ref_cursor );
135 if ( rc == 0 )
136 self->ref_cursor = ref_cursor;
137 if ( rc == 0 )
138 rc = VCursorGetColumnIdx( ref_cursor, &self->ref_cursor_idx_SEQ_ID, "SEQ_ID" );
139 if ( rc == 0 )
140 rc = VCursorGetColumnIdx( ref_cursor, &self->ref_cursor_idx_SEQ_LEN, "SEQ_LEN" );
141 if ( rc == 0 )
142 rc = VCursorGetColumnIdx( ref_cursor, &self->ref_cursor_idx_MAX_SEQ_LEN, "MAX_SEQ_LEN" );
143 if ( rc == 0 )
144 rc = VCursorGetColumnIdx( ref_cursor, &self->ref_cursor_idx_PRIMARY_ALIGNMENT_IDS, "PRIMARY_ALIGNMENT_IDS" );
145 return rc;
146 }
147
MakeRefCursor(PileupEstimator * self,const VDatabase * db,size_t cursor_cache_size)148 static rc_t MakeRefCursor( PileupEstimator *self, const VDatabase * db, size_t cursor_cache_size )
149 {
150 const VTable * tbl;
151 rc_t rc = VDatabaseOpenTableRead( db, &tbl, "%s", "REFERENCE" );
152 if ( rc == 0 )
153 {
154 rc = VTableCreateCachedCursorRead( tbl, &self->ref_cursor, cursor_cache_size );
155 if ( rc == 0 )
156 rc = VCursorAddColumn( self->ref_cursor, &self->ref_cursor_idx_SEQ_ID, "SEQ_ID" );
157 if ( rc == 0 )
158 rc = VCursorAddColumn( self->ref_cursor, &self->ref_cursor_idx_SEQ_LEN, "SEQ_LEN" );
159 if ( rc == 0 )
160 rc = VCursorAddColumn( self->ref_cursor, &self->ref_cursor_idx_MAX_SEQ_LEN, "MAX_SEQ_LEN" );
161 if ( rc == 0 )
162 rc = VCursorAddColumn( self->ref_cursor, &self->ref_cursor_idx_PRIMARY_ALIGNMENT_IDS, "PRIMARY_ALIGNMENT_IDS" );
163 if ( rc == 0 )
164 rc = VCursorOpen( self->ref_cursor );
165 VTableRelease( tbl );
166 }
167 return rc;
168 }
169
AddAlignCursor(PileupEstimator * self,const VCursor * align_cursor)170 static rc_t AddAlignCursor( PileupEstimator *self, const VCursor * align_cursor )
171 {
172 rc_t rc = VCursorAddRef( align_cursor );
173 if ( rc == 0 )
174 self->align_cursor = align_cursor;
175 if ( rc == 0 )
176 rc = VCursorGetColumnIdx( align_cursor, &self->align_cursor_idx_REF_POS, "REF_POS" );
177 if ( rc == 0 )
178 rc = VCursorGetColumnIdx( align_cursor, &self->align_cursor_idx_REF_LEN, "REF_LEN" );
179 if ( rc == 0 && self->use_read_filter )
180 rc = VCursorGetColumnIdx( align_cursor, &self->align_cursor_idx_READ_FILTER, "READ_FILTER" );
181 return rc;
182 }
183
MakeAlignCursor(PileupEstimator * self,const VDatabase * db,size_t cursor_cache_size)184 static rc_t MakeAlignCursor( PileupEstimator *self, const VDatabase * db, size_t cursor_cache_size )
185 {
186 const VTable * tbl;
187 rc_t rc = VDatabaseOpenTableRead( db, &tbl, "%s", "PRIMARY_ALIGNMENT" );
188 if ( rc == 0 )
189 {
190 rc = VTableCreateCachedCursorRead( tbl, &self->align_cursor, cursor_cache_size );
191 if ( rc == 0 )
192 rc = VCursorAddColumn( self->align_cursor, &self->align_cursor_idx_REF_POS, "REF_POS" );
193 if ( rc == 0 )
194 rc = VCursorAddColumn( self->align_cursor, &self->align_cursor_idx_REF_LEN, "REF_LEN" );
195 if ( rc == 0 && self->use_read_filter )
196 rc = VCursorAddColumn( self->align_cursor, &self->align_cursor_idx_READ_FILTER, "READ_FILTER" );
197 if ( rc == 0 )
198 rc = VCursorOpen( self->align_cursor );
199 VTableRelease( tbl );
200 }
201 return rc;
202 }
203
InitializePileupEstimator(PileupEstimator * self,const char * source,size_t cursor_cache_size,const VCursor * ref_cursor,const VCursor * align_cursor,uint64_t cutoff_value,bool use_read_filter)204 static rc_t InitializePileupEstimator( PileupEstimator *self,
205 const char * source,
206 size_t cursor_cache_size,
207 const VCursor * ref_cursor,
208 const VCursor * align_cursor,
209 uint64_t cutoff_value,
210 bool use_read_filter )
211 {
212 rc_t rc = 0;
213 VectorInit( &self->reftable, 0, 25 );
214 self->cutoff_value = cutoff_value;
215 self->use_read_filter = use_read_filter;
216 if ( ref_cursor == NULL || align_cursor == NULL )
217 {
218 if ( source == NULL )
219 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcNull );
220 else
221 {
222 const VDBManager * mgr;
223 rc = VDBManagerMakeRead( &mgr, NULL );
224 if ( rc == 0 )
225 {
226 const VDatabase * db;
227 rc = VDBManagerOpenDBRead( mgr, &db, NULL, "%s", source );
228 if ( rc == 0 )
229 {
230 if ( cursor_cache_size == 0 )
231 cursor_cache_size = DFLT_CACHE_SIZE;
232
233 if ( ref_cursor != NULL )
234 rc = AddRefCursor( self, ref_cursor );
235 else
236 rc = MakeRefCursor( self, db, cursor_cache_size );
237
238 if ( rc == 0 )
239 {
240 if ( align_cursor != NULL )
241 rc = AddAlignCursor( self, align_cursor );
242 else
243 rc = MakeAlignCursor( self, db, cursor_cache_size );
244 }
245 VDatabaseRelease( db );
246 }
247 VDBManagerRelease( mgr );
248 }
249 }
250 }
251 else
252 {
253 rc = AddRefCursor( self, ref_cursor );
254 if ( rc == 0 )
255 rc = AddAlignCursor( self, align_cursor );
256 }
257 return rc;
258 }
259
260
MakePileupEstimator(struct PileupEstimator ** self,const char * source,size_t cursor_cache_size,const struct VCursor * ref_cursor,const struct VCursor * align_cursor,uint64_t cutoff_value,bool use_read_filter)261 LIB_EXPORT rc_t CC MakePileupEstimator( struct PileupEstimator **self,
262 const char * source,
263 size_t cursor_cache_size,
264 const struct VCursor * ref_cursor,
265 const struct VCursor * align_cursor,
266 uint64_t cutoff_value,
267 bool use_read_filter )
268 {
269 rc_t rc = 0;
270 if ( self == NULL )
271 rc = RC( rcAlign, rcQuery, rcConstructing, rcSelf, rcNull );
272 else
273 {
274 PileupEstimator * o = calloc( 1, sizeof *o );
275 *self = NULL;
276 if ( o == NULL )
277 rc = RC( rcAlign, rcQuery, rcConstructing, rcMemory, rcExhausted );
278 else
279 {
280 rc = InitializePileupEstimator( o, source, cursor_cache_size,
281 ref_cursor, align_cursor,
282 cutoff_value, use_read_filter );
283 if ( rc == 0 )
284 *self = o;
285 else
286 ReleasePileupEstimator( o );
287 }
288 }
289 return rc;
290 }
291
292 /* ===================================================================================================== */
293
ScanRefTable(PileupEstimator * self)294 static rc_t ScanRefTable( PileupEstimator *self )
295 {
296 int64_t row;
297 uint64_t row_idx, count;
298 RefEntry * ref_entry = NULL;
299 String rname;
300 uint32_t * ptr;
301 uint32_t elem_bits, boff, row_len;
302
303 rc_t rc = VCursorIdRange( self->ref_cursor, self->ref_cursor_idx_SEQ_ID, &row, &count );
304 for( row_idx = 0; rc == 0 && row_idx < count; ++row, ++row_idx )
305 {
306 /* get max_seq_len if we do not have it yet */
307 if ( self->max_seq_len == 0 )
308 {
309 rc = VCursorCellDataDirect( self->ref_cursor, row, self->ref_cursor_idx_MAX_SEQ_LEN, &elem_bits,
310 ( const void ** )&ptr, &boff, &row_len );
311 if ( rc == 0 && row_len < 1 )
312 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcInvalid );
313 if ( rc == 0 && ptr[ 0 ] == 0 )
314 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcInvalid );
315 if ( rc == 0 )
316 self->max_seq_len = ptr[ 0 ];
317 }
318
319 /* get the REFERENCE-NAME */
320 if ( rc == 0 )
321 {
322 rc = VCursorCellDataDirect( self->ref_cursor, row, self->ref_cursor_idx_SEQ_ID, &elem_bits,
323 ( const void ** )&rname.addr, &boff, &rname.len );
324 if ( rc == 0 && rname.len < 1 )
325 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcInvalid );
326 if ( rc == 0 )
327 rname.size = rname.len;
328 }
329
330 /* get the SEQ_LEN */
331 if ( rc == 0 )
332 {
333 rc = VCursorCellDataDirect( self->ref_cursor, row, self->ref_cursor_idx_SEQ_LEN, &elem_bits,
334 ( const void ** )&ptr, &boff, &row_len );
335 if ( rc == 0 && row_len < 1 )
336 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcInvalid );
337 }
338
339 /* do the counting/inserting ... */
340 if ( rc == 0 )
341 {
342 if ( ref_entry == NULL )
343 rc = make_ref_entry( &ref_entry, &rname, row, ptr[ 0 ] );
344 else
345 {
346 int cmp = StringCompare( ref_entry->rname, &rname );
347 if ( cmp == 0 )
348 {
349 ref_entry->count += 1;
350 ref_entry->reflen += ptr[ 0 ];
351 }
352 else
353 {
354 rc = VectorAppend( &self->reftable, NULL, ref_entry );
355 if ( rc == 0 )
356 rc = make_ref_entry( &ref_entry, &rname, row, ptr[ 0 ] );
357 }
358 }
359 }
360 }
361 /* insert the last entry */
362 if ( rc == 0 && ref_entry != NULL )
363 {
364 rc = VectorAppend( &self->reftable, NULL, ref_entry );
365 }
366 return rc;
367 }
368
FindRefEntry(PileupEstimator * self,RefEntry ** entry,const String * rname)369 static rc_t FindRefEntry( PileupEstimator *self, RefEntry ** entry, const String * rname )
370 {
371 rc_t rc = 0;
372 uint32_t idx, count = VectorLength( &self->reftable );
373 *entry = NULL;
374 for( idx = 0; ( rc == 0 ) && ( idx < count ) && ( *entry == NULL ); ++idx )
375 {
376 RefEntry * e = VectorGet( &self->reftable, idx );
377 if ( e == NULL )
378 rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
379 else if ( 0 == StringCompare( e->rname, rname ) )
380 *entry = e;
381 }
382 if ( *entry == NULL )
383 rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcNotFound );
384 return rc;
385 }
386
387
PerformEstimation(PileupEstimator * self,RefEntry * entry,uint64_t slice_start,uint32_t slice_len,uint64_t slice_end,uint64_t * result)388 static rc_t PerformEstimation( PileupEstimator *self,
389 RefEntry * entry,
390 uint64_t slice_start,
391 uint32_t slice_len,
392 uint64_t slice_end,
393 uint64_t * result )
394 {
395 rc_t rc = 0;
396
397 if ( self->coverage == NULL )
398 {
399 self->coverage = calloc( slice_len, sizeof *( self->coverage ) );
400 self->coverage_len = slice_len;
401 }
402 else if ( self->coverage_len < slice_len )
403 {
404 free( ( void * ) self->coverage );
405 self->coverage = calloc( slice_len, sizeof *( self->coverage ) );
406 self->coverage_len = slice_len;
407 }
408 else
409 {
410 memset( self->coverage, 0, self->coverage_len );
411 }
412
413 if ( self->coverage == NULL )
414 rc = RC( rcAlign, rcQuery, rcConstructing, rcMemory, rcExhausted );
415 else
416 {
417 uint64_t cutoff_sum = 0;
418 int64_t ref_row_id = entry->start_row_id;
419 uint32_t ref_row_count, prim_id_count, prim_id_idx, ref_pos_count, ref_len_count, read_filter_len, elem_bits, boff;
420 int64_t * prim_ids;
421 uint32_t * ref_pos_ptr;
422 uint32_t * ref_len_ptr;
423 uint8_t * read_filter_ptr;
424 bool done = false;
425 bool filtered_out;
426
427 /* adjust start_ref_row and ref_row_count to the given slice... */
428 {
429 int64_t start_offset = slice_start / self->max_seq_len;
430 int64_t end_offset = slice_end / self->max_seq_len;
431 if ( start_offset > 0 ) start_offset--;
432 ref_row_id += start_offset;
433 ref_row_count = ( end_offset - start_offset ) + 1;
434 }
435
436 /* for each row in REFERENCE that matches the given slice... */
437 for( ; rc == 0 && !done && ref_row_count > 0; ref_row_id++, ref_row_count-- )
438 {
439 /* get the primary alignment-id's */
440 rc = VCursorCellDataDirect( self->ref_cursor, ref_row_id, self->ref_cursor_idx_PRIMARY_ALIGNMENT_IDS,
441 &elem_bits, ( const void ** )&prim_ids, &boff, &prim_id_count );
442
443 /* doing the cutoff-check */
444 if ( self->cutoff_value > 0 )
445 {
446 cutoff_sum += prim_id_count;
447 if ( cutoff_sum >= self->cutoff_value )
448 {
449 *result = UINTMAX_MAX;
450 done = true;
451 }
452 }
453
454 /* for each alignment-id found */
455 for ( prim_id_idx = 0; rc == 0 && !done && prim_id_idx < prim_id_count; prim_id_idx++ )
456 {
457 /* get REF_POS */
458 int64_t prim_id = prim_ids[ prim_id_idx ];
459 filtered_out = false;
460 rc = VCursorCellDataDirect( self->align_cursor, prim_id, self->align_cursor_idx_REF_POS,
461 &elem_bits, ( const void ** )&ref_pos_ptr, &boff, &ref_pos_count );
462 if ( ref_pos_count != 1 )
463 rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
464
465 /* get REF_LEN */
466 if ( rc == 0 )
467 {
468 rc = VCursorCellDataDirect( self->align_cursor, prim_id, self->align_cursor_idx_REF_LEN,
469 &elem_bits, ( const void ** )&ref_len_ptr, &boff, &ref_len_count );
470 if ( ref_len_count != 1 )
471 rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
472 }
473
474 /* get READ_FILTER */
475 if ( rc == 0 && self->use_read_filter )
476 {
477 rc = VCursorCellDataDirect( self->align_cursor, prim_id, self->align_cursor_idx_READ_FILTER,
478 &elem_bits, ( const void ** )&read_filter_ptr, &boff, &read_filter_len );
479 if ( read_filter_len > 0 )
480 filtered_out = ( ( read_filter_ptr[ 0 ] == SRA_READ_FILTER_REJECT )||
481 ( read_filter_ptr[ 0 ] == SRA_READ_FILTER_CRITERIA ) );
482 }
483
484 /* enter the coverage */
485 if ( rc == 0 && !filtered_out )
486 {
487 uint32_t ref_pos = ref_pos_ptr[ 0 ];
488 uint32_t ref_len = ref_len_ptr[ 0 ];
489 if ( ( ( ref_pos + ref_len - 1 ) >= slice_start ) && ( ref_pos <= slice_end ) )
490 {
491 int32_t rel_start = ( ref_pos - ( uint32_t )slice_start );
492 int32_t i, j;
493
494 if ( rel_start < 0 )
495 {
496 ref_len += rel_start;
497 rel_start = 0;
498 }
499 for ( i = rel_start, j = 0; j < ref_len && i < slice_len; ++i, ++j )
500 self->coverage[ i ]++; /* <==== */
501 }
502 }
503 }
504 }
505
506 /* sum up the bases in the slice */
507 if ( rc == 0 && !done )
508 {
509 uint32_t idx;
510 uint64_t sum = 0;
511 for ( idx = 0; idx < slice_len; ++idx ) sum += self->coverage[ idx ];
512 *result = sum;
513 }
514 }
515 return rc;
516 }
517
RunPileupEstimator(struct PileupEstimator * self,const String * refname,uint64_t slice_start,uint32_t slice_len,uint64_t * result)518 LIB_EXPORT rc_t CC RunPileupEstimator( struct PileupEstimator *self,
519 const String * refname,
520 uint64_t slice_start,
521 uint32_t slice_len,
522 uint64_t * result )
523 {
524 rc_t rc = 0;
525 if ( self == NULL )
526 rc = RC( rcAlign, rcQuery, rcAccessing, rcSelf, rcNull );
527 else if ( refname == NULL || result == NULL )
528 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcNull );
529 else if ( slice_len == 0 )
530 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcInvalid );
531 else
532 {
533 *result = 0;
534 /* we are using max_seq_len as a flag to determine if we have to scan the reference-table */
535 if ( self->max_seq_len == 0 )
536 rc = ScanRefTable( self );
537 if ( rc == 0 )
538 {
539 RefEntry * ref_entry;
540 rc = FindRefEntry( self, &ref_entry, refname );
541 if ( rc == 0 )
542 {
543 uint64_t slice_end = slice_start + slice_len - 1;
544 if ( slice_start >= ref_entry->reflen || slice_end >= ref_entry->reflen )
545 rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
546 else
547 rc = PerformEstimation( self, ref_entry, slice_start, slice_len, slice_end, result );
548 }
549 }
550 }
551 return rc;
552 }
553
554
PerformCoverage(PileupEstimator * self,RefEntry * entry,uint64_t slice_start,uint32_t slice_len,uint64_t slice_end,uint32_t * coverage)555 static rc_t PerformCoverage( PileupEstimator *self,
556 RefEntry * entry,
557 uint64_t slice_start,
558 uint32_t slice_len,
559 uint64_t slice_end,
560 uint32_t * coverage )
561 {
562 rc_t rc = 0;
563
564 int64_t ref_row_id = entry->start_row_id;
565 uint32_t ref_row_count, prim_id_count, prim_id_idx, ref_pos_count, ref_len_count, read_filter_len, elem_bits, boff;
566 int64_t * prim_ids;
567 uint32_t * ref_pos_ptr;
568 uint32_t * ref_len_ptr;
569 uint8_t * read_filter_ptr;
570 bool filtered_out;
571
572 /* adjust start_ref_row and ref_row_count to the given slice... */
573 {
574 int64_t start_offset = slice_start / self->max_seq_len;
575 int64_t end_offset = slice_end / self->max_seq_len;
576 if ( start_offset > 0 ) start_offset--;
577 ref_row_id += start_offset;
578 ref_row_count = ( end_offset - start_offset ) + 1;
579 }
580
581 /* for each row in REFERENCE that matches the given slice... */
582 for( ; rc == 0 && ref_row_count > 0; ref_row_id++, ref_row_count-- )
583 {
584 /* get the primary alignment-id's */
585 rc = VCursorCellDataDirect( self->ref_cursor, ref_row_id, self->ref_cursor_idx_PRIMARY_ALIGNMENT_IDS,
586 &elem_bits, ( const void ** )&prim_ids, &boff, &prim_id_count );
587
588 /* for each alignment-id found */
589 for ( prim_id_idx = 0; rc == 0 && prim_id_idx < prim_id_count; prim_id_idx++ )
590 {
591 /* get REF_POS */
592 int64_t prim_id = prim_ids[ prim_id_idx ];
593 filtered_out = false;
594 rc = VCursorCellDataDirect( self->align_cursor, prim_id, self->align_cursor_idx_REF_POS,
595 &elem_bits, ( const void ** )&ref_pos_ptr, &boff, &ref_pos_count );
596 if ( ref_pos_count != 1 )
597 rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
598
599 /* get REF_LEN */
600 if ( rc == 0 )
601 {
602 rc = VCursorCellDataDirect( self->align_cursor, prim_id, self->align_cursor_idx_REF_LEN,
603 &elem_bits, ( const void ** )&ref_len_ptr, &boff, &ref_len_count );
604 if ( ref_len_count != 1 )
605 rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
606 }
607
608 /* get READ_FILTER */
609 if ( rc == 0 && self->use_read_filter )
610 {
611 rc = VCursorCellDataDirect( self->align_cursor, prim_id, self->align_cursor_idx_READ_FILTER,
612 &elem_bits, ( const void ** )&read_filter_ptr, &boff, &read_filter_len );
613 if ( read_filter_len > 0 )
614 filtered_out = ( ( read_filter_ptr[ 0 ] == SRA_READ_FILTER_REJECT )||
615 ( read_filter_ptr[ 0 ] == SRA_READ_FILTER_CRITERIA ) );
616 }
617
618 /* enter the coverage */
619 if ( rc == 0 && !filtered_out )
620 {
621 uint64_t ref_pos = ref_pos_ptr[ 0 ];
622 uint64_t ref_len = ref_len_ptr[ 0 ];
623 if ( ( ( ref_pos + ref_len - 1 ) >= slice_start ) && ( ref_pos <= slice_end ) )
624 {
625 int64_t j;
626 int64_t rel_pos = ref_pos;
627 rel_pos -= slice_start;
628
629 if ( rel_pos < 0 )
630 {
631 ref_len += rel_pos;
632 rel_pos = 0;
633 }
634
635 for ( j = 0; j < ref_len && rel_pos < slice_len; ++rel_pos, ++j )
636 coverage[ rel_pos ]++; /* <==== */
637 }
638 }
639 }
640 }
641 return rc;
642 }
643
644
RunCoverage(struct PileupEstimator * self,const String * refname,uint64_t slice_start,uint32_t slice_len,uint32_t * coverage)645 LIB_EXPORT rc_t CC RunCoverage( struct PileupEstimator *self,
646 const String * refname,
647 uint64_t slice_start,
648 uint32_t slice_len,
649 uint32_t * coverage )
650 {
651 rc_t rc = 0;
652 if ( self == NULL )
653 rc = RC( rcAlign, rcQuery, rcAccessing, rcSelf, rcNull );
654 else if ( refname == NULL || coverage == NULL )
655 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcNull );
656 else if ( slice_len == 0 )
657 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcInvalid );
658 else
659 {
660 /* we are using max_seq_len as a flag to determine if we have to scan the reference-table */
661 if ( self->max_seq_len == 0 )
662 rc = ScanRefTable( self );
663 if ( rc == 0 )
664 {
665 RefEntry * ref_entry;
666 rc = FindRefEntry( self, &ref_entry, refname );
667 if ( rc == 0 )
668 {
669 uint64_t slice_end = slice_start + slice_len - 1;
670 if ( slice_start >= ref_entry->reflen || slice_end >= ref_entry->reflen )
671 rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
672 else
673 {
674 memset( coverage, 0, slice_len * ( sizeof *coverage ) );
675 rc = PerformCoverage( self, ref_entry, slice_start, slice_len, slice_end, coverage );
676 }
677 }
678 }
679
680 }
681 return rc;
682 }
683
684
EstimatorRefCount(struct PileupEstimator * self,uint32_t * count)685 LIB_EXPORT rc_t CC EstimatorRefCount( struct PileupEstimator *self, uint32_t * count )
686 {
687 rc_t rc = 0;
688 if ( self == NULL )
689 rc = RC( rcAlign, rcQuery, rcAccessing, rcSelf, rcNull );
690 else if ( count == NULL )
691 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcNull );
692 else
693 {
694 /* we are using max_seq_len as a flag to determine if we have to scan the reference-table */
695 if ( self->max_seq_len == 0 )
696 rc = ScanRefTable( self );
697 if ( rc == 0 )
698 *count = VectorLength( &self->reftable );
699 }
700 return rc;
701 }
702
703
EstimatorRefInfo(struct PileupEstimator * self,uint32_t idx,String * refname,uint64_t * reflen)704 LIB_EXPORT rc_t CC EstimatorRefInfo( struct PileupEstimator *self,
705 uint32_t idx,
706 String * refname,
707 uint64_t * reflen )
708 {
709 rc_t rc = 0;
710 if ( self == NULL )
711 rc = RC( rcAlign, rcQuery, rcAccessing, rcSelf, rcNull );
712 else if ( refname == NULL || reflen == NULL )
713 rc = RC( rcAlign, rcQuery, rcAccessing, rcParam, rcNull );
714 else
715 {
716 if ( self->max_seq_len == 0 )
717 rc = ScanRefTable( self );
718 if ( rc == 0 )
719 {
720 RefEntry * e = VectorGet( &self->reftable, idx );
721 if ( e == NULL )
722 rc = RC( rcAlign, rcQuery, rcAccessing, rcItem, rcInvalid );
723 else
724 {
725 StringInit( refname, e->rname->addr, e->rname->size, e->rname->len );
726 *reflen = e->reflen;
727 }
728 }
729 }
730 return rc;
731 }
732