1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include "SRA_Read.h"
28
29 #include "NGS_String.h"
30 #include "NGS_Cursor.h"
31 #include "NGS_Id.h"
32
33 #include <kfc/ctx.h>
34 #include <kfc/rsrc.h>
35 #include <kfc/except.h>
36 #include <kfc/xc.h>
37 #include <klib/text.h>
38 #include <klib/printf.h>
39 #include <klib/refcount.h>
40 #include <klib/rc.h>
41 #include <vdb/cursor.h>
42 #include <vdb/schema.h>
43 #include <vdb/vdb-priv.h>
44 #include <insdc/insdc.h>
45
46 #include <stddef.h>
47 #include <assert.h>
48
49 #include <sysalloc.h>
50
51 #ifndef min
52 # define min(a,b) ( (a) < (b) ? (a) : (b) )
53 #endif
54
55 /*--------------------------------------------------------------------------
56 * SRA_Read
57 */
58
59 const char * sequence_col_specs [] =
60 {
61 "(INSDC:dna:text)READ",
62 "READ_TYPE",
63 "(INSDC:quality:phred)QUALITY",
64 "(INSDC:coord:len)READ_LEN",
65 "NAME",
66 "SPOT_GROUP",
67 "PRIMARY_ALIGNMENT_ID",
68 "(U64)SPOT_COUNT",
69 "(INSDC:dna:text)CMP_READ",
70 };
71
72 static NGS_Read_vt NGS_Read_vt_inst =
73 {
74 {
75 {
76 /* NGS_Refcount */
77 SRA_ReadWhack
78 },
79
80 /* NGS_Fragment */
81 SRA_FragmentGetId,
82 SRA_FragmentGetSequence,
83 SRA_FragmentGetQualities,
84 SRA_FragmentIsPaired,
85 SRA_FragmentIsAligned,
86 SRA_FragmentNext
87 },
88
89 /* NGS_Read */
90 SRA_ReadGetId,
91 SRA_ReadGetName,
92 SRA_ReadGetReadGroup,
93 SRA_ReadGetCategory,
94 SRA_ReadGetSequence,
95 SRA_ReadGetQualities,
96 SRA_ReadNumFragments,
97 SRA_ReadFragIsAligned,
98 SRA_ReadIteratorNext,
99 SRA_ReadIteratorGetCount,
100 };
101
102 /* Init
103 */
104 static
SRA_ReadInit(ctx_t ctx,SRA_Read * self,const char * clsname,const char * instname,const NGS_String * run_name)105 void SRA_ReadInit ( ctx_t ctx, SRA_Read * self, const char *clsname, const char *instname, const NGS_String * run_name )
106 {
107 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
108
109 if ( self == NULL )
110 INTERNAL_ERROR ( xcParamNull, "bad object reference" );
111 else
112 {
113 TRY ( NGS_ReadInit ( ctx, & self -> dad, & NGS_Read_vt_inst, clsname, instname ) )
114 {
115 TRY ( self -> run_name = NGS_StringDuplicate ( run_name, ctx ) )
116 {
117 self -> wants_full = true;
118 self -> wants_partial = true;
119 self -> wants_unaligned = true;
120 }
121 }
122 }
123 }
124
125 static
SRA_ReadIteratorInit(ctx_t ctx,SRA_Read * self,const char * clsname,const char * instname,const NGS_String * run_name,bool wants_full,bool wants_partial,bool wants_unaligned)126 void SRA_ReadIteratorInit ( ctx_t ctx,
127 SRA_Read * self,
128 const char *clsname,
129 const char *instname,
130 const NGS_String * run_name,
131 bool wants_full,
132 bool wants_partial,
133 bool wants_unaligned )
134 {
135 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
136
137 if ( self == NULL )
138 INTERNAL_ERROR ( xcParamNull, "bad object reference" );
139 else
140 {
141 TRY ( NGS_ReadIteratorInit ( ctx, & self -> dad, & NGS_Read_vt_inst, clsname, instname ) )
142 {
143 TRY ( self -> run_name = NGS_StringDuplicate ( run_name, ctx ) )
144 {
145 self -> wants_full = wants_full;
146 self -> wants_partial = wants_partial;
147 self -> wants_unaligned = wants_unaligned;
148 }
149 }
150 }
151 }
152
153 /* Whack
154 */
SRA_ReadWhack(SRA_Read * self,ctx_t ctx)155 void SRA_ReadWhack ( SRA_Read * self, ctx_t ctx )
156 {
157 NGS_CursorRelease ( self -> curs, ctx );
158
159 NGS_StringRelease ( self -> group_name, ctx );
160 NGS_StringRelease ( self -> run_name, ctx );
161 }
162
163 /* Release
164 * release reference
165 */
SRA_ReadRelease(const SRA_Read * self,ctx_t ctx)166 void SRA_ReadRelease ( const SRA_Read * self, ctx_t ctx )
167 {
168 if ( self != NULL )
169 {
170 NGS_ReadRelease ( & self -> dad, ctx );
171 }
172 }
173
174 /* Duplicate
175 * duplicate reference
176 */
SRA_ReadDuplicate(const SRA_Read * self,ctx_t ctx)177 SRA_Read * SRA_ReadDuplicate ( const SRA_Read * self, ctx_t ctx )
178 {
179 return NGS_RefcountDuplicate ( NGS_ReadToRefcount ( & self -> dad ), ctx );
180 }
181
SRA_ReadIteratorInitFragment(SRA_Read * self,ctx_t ctx)182 void SRA_ReadIteratorInitFragment ( SRA_Read * self, ctx_t ctx )
183 {
184 const void * base;
185 uint32_t elem_bits, boff, row_len;
186
187 /* read from READ_TYPE must succeed */
188 TRY ( NGS_CursorCellDataDirect ( self -> curs, ctx, self -> cur_row, seq_READ_TYPE, & elem_bits, & base, & boff, & row_len ) )
189 {
190 assert ( elem_bits == 8 );
191 assert ( boff == 0 );
192 self -> READ_TYPE = base;
193
194 TRY ( NGS_CursorCellDataDirect ( self -> curs, ctx, self -> cur_row, seq_READ_LEN, & elem_bits, & base, & boff, & row_len ) )
195 {
196 uint32_t i;
197
198 assert ( elem_bits == 32 );
199 assert ( boff == 0 );
200 self -> READ_LEN = base;
201 self -> frag_max = row_len;
202
203 /* naked hackery to quickly scan types */
204 assert ( READ_TYPE_TECHNICAL == 0 );
205 assert ( READ_TYPE_BIOLOGICAL == 1 );
206
207 /* NB - should also be taking READ_FILTER into account */
208 for ( i = 0; i < row_len; ++ i )
209 {
210 if ( self -> READ_LEN [ i ] != 0 )
211 {
212 self -> bio_frags += self -> READ_TYPE [ i ] & READ_TYPE_BIOLOGICAL;
213 }
214 }
215 }
216 }
217 }
218
SRA_ReadMake(ctx_t ctx,const NGS_Cursor * curs,int64_t readId,const struct NGS_String * run_name)219 NGS_Read * SRA_ReadMake ( ctx_t ctx, const NGS_Cursor * curs, int64_t readId, const struct NGS_String * run_name )
220 {
221 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
222
223 SRA_Read * ref;
224
225 assert ( curs != NULL );
226
227 ref = calloc ( 1, sizeof * ref );
228 if ( ref == NULL )
229 SYSTEM_ERROR ( xcNoMemory, "allocating SRA_Read(%lu) on '%.*s'", readId, NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
230 else
231 {
232 #if _DEBUGGING
233 char instname [ 256 ];
234 string_printf ( instname, sizeof instname, NULL, "%.*s(%lu)", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ), readId );
235 instname [ sizeof instname - 1 ] = 0;
236 #else
237 const char *instname = "";
238 #endif
239 TRY ( SRA_ReadInit ( ctx, ref, "SRA_Read", instname, run_name ) )
240 {
241 uint64_t row_count = NGS_CursorGetRowCount ( curs, ctx );
242
243 /* validate the requested rowId and seek to it */
244 if ( readId <= 0 || (uint64_t)readId > row_count )
245 {
246 INTERNAL_ERROR ( xcCursorAccessFailed, "rowId ( %li ) out of range for %.*s", readId, NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
247 }
248 else
249 {
250 ref -> curs = NGS_CursorDuplicate ( curs, ctx );
251 ref -> cur_row = readId;
252 TRY ( SRA_ReadIteratorInitFragment ( ref, ctx ) )
253 {
254 ref -> row_max = readId + 1;
255 ref -> row_count = 1;
256 ref -> seen_first = true;
257 return & ref -> dad;
258 }
259 }
260
261 SRA_ReadRelease ( ref, ctx );
262 return NULL;
263 }
264 free ( ref );
265 }
266
267 return NULL;
268 }
269
270 /* GetReadId
271 */
SRA_ReadGetId(SRA_Read * self,ctx_t ctx)272 NGS_String * SRA_ReadGetId ( SRA_Read * self, ctx_t ctx )
273 {
274 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
275
276 assert ( self != NULL );
277
278 if ( ! self -> seen_first )
279 {
280 USER_ERROR ( xcIteratorUninitialized, "Read accessed before a call to ReadIteratorNext()" );
281 return NULL;
282 }
283
284 /* if current row is valid, read data */
285 if ( self -> cur_row < self -> row_max )
286 return NGS_IdMake ( ctx, self -> run_name, NGSObject_Read, self -> cur_row );
287
288 USER_ERROR ( xcCursorExhausted, "No more rows available" );
289 return NULL;
290 }
291
292 /* GetReadName
293 */
SRA_ReadGetName(SRA_Read * self,ctx_t ctx)294 NGS_String * SRA_ReadGetName ( SRA_Read * self, ctx_t ctx )
295 {
296 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
297
298 assert ( self != NULL );
299
300 if ( ! self -> seen_first )
301 {
302 USER_ERROR ( xcIteratorUninitialized, "Read accessed before a call to ReadIteratorNext()" );
303 return NULL;
304 }
305 else
306 {
307 NGS_String * ret;
308 ON_FAIL ( ret = NGS_CursorGetString( self -> curs, ctx, self -> cur_row, seq_NAME ) )
309 {
310 if ( GetRCObject ( ctx -> rc ) == (int)rcColumn && GetRCState ( ctx -> rc ) == rcNotFound )
311 { /* no NAME column; synthesize a read name based on run_name and row_id */
312 CLEAR ();
313 ret = NGS_IdMake ( ctx, self -> run_name, NGSObject_Read, self -> cur_row );
314 }
315 }
316
317 return ret;
318 }
319 }
320
321 /* GetReadGroup
322 */
SRA_ReadGetReadGroup(SRA_Read * self,ctx_t ctx)323 NGS_String * SRA_ReadGetReadGroup ( SRA_Read * self, ctx_t ctx )
324 {
325 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
326
327 assert ( self != NULL );
328
329 if ( ! self -> seen_first )
330 {
331 USER_ERROR ( xcIteratorUninitialized, "Read accessed before a call to ReadIteratorNext()" );
332 return NULL;
333 }
334
335 return NGS_CursorGetString ( self -> curs, ctx, self -> cur_row, seq_GROUP );
336 }
337
SRA_ReadGetCategory(const SRA_Read * self,ctx_t ctx)338 enum NGS_ReadCategory SRA_ReadGetCategory ( const SRA_Read * self, ctx_t ctx )
339 {
340 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
341
342 assert ( self != NULL );
343
344 if ( ! self -> seen_first )
345 {
346 USER_ERROR ( xcIteratorUninitialized, "Read accessed before a call to ReadIteratorNext()" );
347 return NGS_ReadCategory_unaligned;
348 }
349
350 if ( self -> cur_row < self -> row_max )
351 {
352 const void * base;
353 uint32_t elem_bits, boff, row_len;
354 ON_FAIL ( NGS_CursorCellDataDirect ( self -> curs, ctx, self -> cur_row, seq_PRIMARY_ALIGNMENT_ID, & elem_bits, & base, & boff, & row_len ) )
355 {
356 CLEAR();
357 return NGS_ReadCategory_unaligned;
358 }
359
360 {
361 uint32_t i;
362 bool seen_aligned = false;
363 bool seen_unaligned = false;
364 const int64_t * orig = base;
365 assert(elem_bits == 64);
366 for ( i = 0; i < row_len; ++ i )
367 {
368 if (orig[i] == 0)
369 {
370 if (!seen_unaligned)
371 seen_unaligned = true;
372 }
373 else
374 {
375 if (!seen_aligned)
376 seen_aligned = true;
377 }
378 }
379 if (seen_aligned)
380 {
381 return seen_unaligned ? NGS_ReadCategory_partiallyAligned : NGS_ReadCategory_fullyAligned;
382 }
383 else
384 { /* no aligned fragments */
385 return NGS_ReadCategory_unaligned;
386 }
387 }
388 }
389 USER_ERROR ( xcCursorExhausted, "No more rows available" );
390 return NGS_ReadCategory_unaligned;
391 }
392
393 /* GetReadSequence
394 */
SRA_ReadGetSequence(SRA_Read * self,ctx_t ctx,uint64_t offset,uint64_t size)395 NGS_String * SRA_ReadGetSequence ( SRA_Read * self, ctx_t ctx, uint64_t offset, uint64_t size )
396 {
397 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
398 NGS_String * seq;
399
400 assert ( self != NULL );
401
402 if ( ! self -> seen_first )
403 {
404 USER_ERROR ( xcIteratorUninitialized, "Read accessed before a call to ReadIteratorNext()" );
405 return NULL;
406 }
407
408 TRY ( seq = NGS_CursorGetString ( self -> curs, ctx, self -> cur_row, seq_READ ) )
409 {
410 NGS_String * sub;
411 TRY ( sub = NGS_StringSubstrOffsetSize ( seq, ctx, offset, size ) )
412 {
413 NGS_StringRelease ( seq, ctx );
414 seq = sub;
415 }
416 }
417
418 return seq;
419 }
420
421 /* GetReadQualities
422 * GetReadSubQualities
423 */
424 static
GetReadQualities(SRA_Read * self,ctx_t ctx)425 NGS_String * GetReadQualities ( SRA_Read * self, ctx_t ctx )
426 {
427 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
428
429 /* if current row is valid, read data */
430 if ( self -> cur_row < self -> row_max )
431 {
432 const void * base;
433 uint32_t elem_bits, boff, row_len;
434 TRY ( NGS_CursorCellDataDirect ( self -> curs, ctx, self -> cur_row, seq_QUALITY, & elem_bits, & base, & boff, & row_len ) )
435 {
436 NGS_String * new_data = NULL;
437
438 assert ( elem_bits == 8 );
439 assert ( boff == 0 );
440
441 { /* convert to ascii-33 */
442 char * copy = malloc ( row_len + 1 );
443 if ( copy == NULL )
444 SYSTEM_ERROR ( xcNoMemory, "allocating %u bytes for QUALITY row %ld", row_len + 1, self -> cur_row );
445 else
446 {
447 uint32_t i;
448 const uint8_t * orig = base;
449 for ( i = 0; i < row_len; ++ i )
450 copy [ i ] = ( char ) ( orig [ i ] + 33 );
451 copy [ i ] = 0;
452
453 new_data = NGS_StringMakeOwned ( ctx, copy, row_len );
454 if ( FAILED () )
455 free ( copy );
456 }
457 }
458
459 if ( ! FAILED () )
460 {
461 return new_data;
462 }
463 }
464 }
465
466 return NULL;
467 }
468
SRA_ReadGetQualities(SRA_Read * self,ctx_t ctx,uint64_t offset,uint64_t size)469 NGS_String * SRA_ReadGetQualities ( SRA_Read * self, ctx_t ctx, uint64_t offset, uint64_t size )
470 {
471 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
472 NGS_String * qual;
473
474 assert ( self != NULL );
475
476 if ( ! self -> seen_first )
477 {
478 USER_ERROR ( xcIteratorUninitialized, "Read accessed before a call to ReadIteratorNext()" );
479 return NULL;
480 }
481
482 TRY ( qual = GetReadQualities ( self, ctx ) )
483 {
484 NGS_String * sub;
485 TRY ( sub = NGS_StringSubstrOffsetSize ( qual, ctx, offset, size ) )
486 {
487 NGS_StringRelease ( qual, ctx );
488 qual = sub;
489 }
490 }
491
492 return qual;
493 }
494
495 /* NumFragments
496 */
SRA_ReadNumFragments(SRA_Read * self,ctx_t ctx)497 uint32_t SRA_ReadNumFragments ( SRA_Read * self, ctx_t ctx )
498 {
499 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
500
501 assert ( self != NULL );
502
503 if ( ! self -> seen_first )
504 {
505 USER_ERROR ( xcIteratorUninitialized, "Read accessed before a call to ReadIteratorNext()" );
506 return 0;
507 }
508
509 if ( self -> cur_row >= self -> row_max )
510 {
511 USER_ERROR ( xcCursorExhausted, "No more rows available" );
512 return false;
513 }
514
515 return self -> bio_frags;
516 }
517
518 /* FragIsAligned
519 */
SRA_ReadFragIsAligned(SRA_Read * self,ctx_t ctx,uint32_t frag_idx)520 bool SRA_ReadFragIsAligned ( SRA_Read * self, ctx_t ctx, uint32_t frag_idx )
521 {
522 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
523
524 assert ( self != NULL );
525
526 if ( ! self -> seen_first )
527 {
528 USER_ERROR ( xcIteratorUninitialized, "Read accessed before a call to ReadIteratorNext()" );
529 return 0;
530 }
531
532 if ( self -> cur_row >= self -> row_max )
533 {
534 USER_ERROR ( xcCursorExhausted, "No more rows available" );
535 return false;
536 }
537
538 if ( frag_idx >= self -> bio_frags )
539 {
540 USER_ERROR ( xcIntegerOutOfBounds, "bad fragment index" );
541 return false;
542 }
543
544 return false;
545 }
546
547 /*--------------------------------------------------------------------------
548 * NGS_ReadIterator
549 */
550
551 /* Make
552 */
SRA_ReadIteratorMake(ctx_t ctx,const NGS_Cursor * curs,const NGS_String * run_name,bool wants_full,bool wants_partial,bool wants_unaligned)553 NGS_Read * SRA_ReadIteratorMake ( ctx_t ctx,
554 const NGS_Cursor * curs,
555 const NGS_String * run_name,
556 bool wants_full,
557 bool wants_partial,
558 bool wants_unaligned )
559 {
560 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
561
562 SRA_Read * ref;
563
564 assert ( curs != NULL );
565
566 ref = calloc ( 1, sizeof * ref );
567 if ( ref == NULL )
568 SYSTEM_ERROR ( xcNoMemory, "allocating NGS_ReadIterator on '%.*s'", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
569 else
570 {
571 #if _DEBUGGING
572 char instname [ 256 ];
573 string_printf ( instname, sizeof instname, NULL, "%.*s", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
574 instname [ sizeof instname - 1 ] = 0;
575 #else
576 const char *instname = "";
577 #endif
578 TRY ( SRA_ReadIteratorInit ( ctx, ref, "NGS_ReadIterator", instname, run_name, wants_full, wants_partial, wants_unaligned ) )
579 {
580 ref -> curs = NGS_CursorDuplicate ( curs, ctx );
581 TRY ( NGS_CursorGetRowRange ( ref -> curs, ctx, & ref -> cur_row, & ref -> row_count ) )
582 {
583 ref -> row_max = ref -> cur_row + ref -> row_count;
584 return & ref -> dad;
585 }
586 SRA_ReadRelease ( ref, ctx );
587 return NULL;
588 }
589
590 free ( ref );
591 }
592
593 return NULL;
594 }
595
596 /* MakeRange
597 */
SRA_ReadIteratorMakeRange(ctx_t ctx,const NGS_Cursor * curs,const NGS_String * run_name,uint64_t first,uint64_t count,bool wants_full,bool wants_partial,bool wants_unaligned)598 NGS_Read * SRA_ReadIteratorMakeRange ( ctx_t ctx,
599 const NGS_Cursor * curs,
600 const NGS_String * run_name,
601 uint64_t first,
602 uint64_t count,
603 bool wants_full,
604 bool wants_partial,
605 bool wants_unaligned )
606 {
607 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
608
609 SRA_Read * ref;
610
611 assert ( curs != NULL );
612
613 ref = calloc ( 1, sizeof * ref );
614 if ( ref == NULL )
615 SYSTEM_ERROR ( xcNoMemory, "allocating NGS_ReadIterator on '%.*s'", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
616 else
617 {
618 #if _DEBUGGING
619 char instname [ 256 ];
620 string_printf ( instname, sizeof instname, NULL, "%.*s", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
621 instname [ sizeof instname - 1 ] = 0;
622 #else
623 const char *instname = "";
624 #endif
625 TRY ( SRA_ReadIteratorInit ( ctx, ref, "NGS_ReadIterator", instname, run_name, wants_full, wants_partial, wants_unaligned ) )
626 {
627 ref -> curs = NGS_CursorDuplicate ( curs, ctx );
628 TRY ( NGS_CursorGetRowRange ( ref -> curs, ctx, & ref -> cur_row, & ref -> row_count ) )
629 {
630 ref -> row_max = min ( first + count, ref -> cur_row + ref -> row_count );
631 ref -> cur_row = first;
632 return & ref -> dad;
633 }
634 SRA_ReadRelease ( ref, ctx );
635 return NULL;
636 }
637
638 free ( ref );
639 }
640
641 return NULL;
642 }
643
644 /* MakeReadGroup
645 */
SRA_ReadIteratorMakeReadGroup(ctx_t ctx,const NGS_Cursor * curs,const NGS_String * run_name,const NGS_String * group_name,uint64_t first,uint64_t count,bool wants_full,bool wants_partial,bool wants_unaligned)646 NGS_Read * SRA_ReadIteratorMakeReadGroup ( ctx_t ctx,
647 const NGS_Cursor * curs,
648 const NGS_String * run_name,
649 const NGS_String * group_name,
650 uint64_t first,
651 uint64_t count,
652 bool wants_full,
653 bool wants_partial,
654 bool wants_unaligned )
655 {
656 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
657
658 TRY ( SRA_Read * ref = (SRA_Read*) SRA_ReadIteratorMakeRange ( ctx,
659 curs,
660 run_name,
661 first,
662 count,
663 wants_full,
664 wants_partial,
665 wants_unaligned ) )
666 {
667 TRY ( ref -> group_name = NGS_StringDuplicate ( group_name, ctx ) )
668 {
669 return & ref -> dad;
670 }
671 SRA_ReadWhack ( ref, ctx );
672 }
673 return NULL;
674 }
675
676 /* Next
677 */
SRA_ReadIteratorNext(SRA_Read * self,ctx_t ctx)678 bool SRA_ReadIteratorNext ( SRA_Read * self, ctx_t ctx )
679 {
680 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
681
682 assert ( self != NULL );
683
684 /* reset fragment */
685 self -> seen_first_frag = false;
686 self -> seen_last_frag = false;
687
688 self -> cur_frag = 0;
689 self -> bio_frags = 0;
690 self -> frag_idx = 0;
691 self -> frag_max = 0;
692 self -> frag_start = 0;
693 self -> frag_len = 0;
694
695 self -> READ_TYPE = NULL;
696 self -> READ_LEN = NULL;
697
698 if ( self -> seen_first )
699 { /* move to next row */
700 ++ self -> cur_row;
701 }
702 else
703 {
704 self -> seen_first = true;
705 }
706
707 while ( self -> cur_row < self -> row_max )
708 {
709 /* work the category filter */
710 if ( ! self -> wants_full ||
711 ! self -> wants_partial ||
712 ! self -> wants_unaligned )
713 {
714 ON_FAIL ( enum NGS_ReadCategory cat = SRA_ReadGetCategory ( self, ctx ) )
715 return false;
716
717 if ( ( cat == NGS_ReadCategory_fullyAligned && ! self -> wants_full )
718 ||
719 ( cat == NGS_ReadCategory_partiallyAligned && ! self -> wants_partial )
720 ||
721 ( cat == NGS_ReadCategory_unaligned && ! self -> wants_unaligned ) )
722 {
723 ++ self -> cur_row;
724 continue;
725 }
726 }
727
728 /* work the read group filter if required */
729 if ( self -> group_name != NULL )
730 {
731 uint32_t size;
732
733 ON_FAIL ( NGS_String* group = NGS_CursorGetString ( self -> curs, ctx, self -> cur_row, seq_GROUP ) )
734 return false;
735
736 size = ( uint32_t ) NGS_StringSize ( group, ctx );
737 if ( string_cmp ( NGS_StringData ( self -> group_name, ctx ),
738 NGS_StringSize ( self -> group_name, ctx ),
739 NGS_StringData ( group, ctx ),
740 size,
741 size ) != 0 )
742 {
743 NGS_StringRelease ( group, ctx );
744 ++ self -> cur_row;
745 continue;
746 }
747 NGS_StringRelease ( group, ctx );
748 }
749
750 TRY ( SRA_ReadIteratorInitFragment ( self, ctx ) )
751 {
752 return true;
753 }
754
755 break;
756 }
757
758 return false;
759 }
760
761 /* GetCount
762 * TEMPORARY
763 */
SRA_ReadIteratorGetCount(const SRA_Read * self,ctx_t ctx)764 uint64_t SRA_ReadIteratorGetCount ( const SRA_Read * self, ctx_t ctx )
765 {
766 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
767
768 assert ( self != NULL );
769 return self -> row_count;
770 }
771
SRA_FragmentGetId(SRA_Read * self,ctx_t ctx)772 NGS_String * SRA_FragmentGetId ( SRA_Read * self, ctx_t ctx )
773 {
774 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
775
776 assert ( self != NULL );
777 if ( ! self -> seen_first_frag )
778 {
779 USER_ERROR ( xcIteratorUninitialized, "Fragment accessed before a call to FragmentIteratorNext()" );
780 return NULL;
781 }
782 if ( self -> seen_last_frag )
783 {
784 USER_ERROR ( xcCursorExhausted, "No more rows available" );
785 return NULL;
786 }
787
788 return NGS_IdMakeFragment ( ctx, self -> run_name, false, self -> cur_row, self -> cur_frag );
789 }
790
791 static
GetFragmentString(const SRA_Read * self,ctx_t ctx,NGS_String * str)792 NGS_String * GetFragmentString ( const SRA_Read * self, ctx_t ctx, NGS_String * str )
793 {
794 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
795
796 assert ( self != NULL );
797 if ( ! self -> seen_first_frag )
798 {
799 USER_ERROR ( xcIteratorUninitialized, "Fragment accessed before a call to FragmentIteratorNext()" );
800 return NULL;
801 }
802 if ( self -> seen_last_frag )
803 {
804 USER_ERROR ( xcCursorExhausted, "No more rows available" );
805 return NULL;
806 }
807
808 if ( self -> cur_row < self -> row_max )
809 {
810 TRY ( NGS_String * frag = NGS_StringSubstrOffsetSize ( str, ctx, self -> frag_start, self -> frag_len ) )
811 {
812 return frag;
813 }
814 }
815
816 return NULL;
817 }
818
SRA_FragmentGetSequence(SRA_Read * self,ctx_t ctx,uint64_t offset,uint64_t length)819 struct NGS_String * SRA_FragmentGetSequence ( SRA_Read * self, ctx_t ctx, uint64_t offset, uint64_t length )
820 {
821 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
822 NGS_String * ret = NULL;
823
824 assert ( self != NULL );
825 if ( ! self -> seen_first_frag )
826 {
827 USER_ERROR ( xcIteratorUninitialized, "Fragment accessed before a call to FragmentIteratorNext()" );
828 return NULL;
829 }
830 if ( self -> seen_last_frag )
831 {
832 USER_ERROR ( xcCursorExhausted, "No more rows available" );
833 return NULL;
834 }
835
836 {
837 TRY ( NGS_String * read = NGS_CursorGetString ( self -> curs, ctx, self -> cur_row, seq_READ ) )
838 {
839 TRY ( NGS_String * seq = GetFragmentString ( self, ctx, read ) )
840 {
841 ret = NGS_StringSubstrOffsetSize ( seq, ctx, offset, length );
842 NGS_StringRelease ( seq, ctx );
843 }
844 NGS_StringRelease ( read, ctx );
845 }
846 }
847 return ret;
848 }
849
SRA_FragmentGetQualities(SRA_Read * self,ctx_t ctx,uint64_t offset,uint64_t length)850 struct NGS_String * SRA_FragmentGetQualities ( SRA_Read * self, ctx_t ctx, uint64_t offset, uint64_t length )
851 {
852 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
853 NGS_String * ret = NULL;
854
855 assert ( self != NULL );
856 if ( ! self -> seen_first_frag )
857 {
858 USER_ERROR ( xcIteratorUninitialized, "Fragment accessed before a call to FragmentIteratorNext()" );
859 return NULL;
860 }
861 if ( self -> seen_last_frag )
862 {
863 USER_ERROR ( xcCursorExhausted, "No more rows available" );
864 return NULL;
865 }
866
867 {
868 TRY ( NGS_String * readQual = GetReadQualities ( self, ctx ) )
869 {
870 TRY ( NGS_String * fragQual = GetFragmentString ( self, ctx, readQual ) )
871 {
872 ret = NGS_StringSubstrOffsetSize ( fragQual, ctx, offset, length );
873 NGS_StringRelease ( fragQual, ctx );
874 }
875 NGS_StringRelease ( readQual, ctx );
876 }
877 }
878 return ret;
879 }
880
SRA_FragmentIsPaired(SRA_Read * self,ctx_t ctx)881 bool SRA_FragmentIsPaired ( SRA_Read * self, ctx_t ctx )
882 {
883 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
884
885 assert ( self != NULL );
886 if ( ! self -> seen_first_frag )
887 {
888 USER_ERROR ( xcIteratorUninitialized, "Fragment accessed before a call to FragmentIteratorNext()" );
889 return false;
890 }
891 if ( self -> seen_last_frag )
892 {
893 USER_ERROR ( xcCursorExhausted, "No more rows available" );
894 return false;
895 }
896
897 return self -> bio_frags > 1;
898 }
899
SRA_FragmentIsAligned(SRA_Read * self,ctx_t ctx)900 bool SRA_FragmentIsAligned ( SRA_Read * self, ctx_t ctx )
901 {
902 assert ( self != NULL );
903 return false;
904 }
905
SRA_FragmentNext(SRA_Read * self,ctx_t ctx)906 bool SRA_FragmentNext ( SRA_Read * self, ctx_t ctx )
907 {
908 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
909
910 assert ( self != NULL );
911
912 if ( ! self -> seen_first )
913 {
914 USER_ERROR ( xcIteratorUninitialized, "Read accessed before a call to ReadIteratorNext()" );
915 return false;
916 }
917
918 if ( self -> seen_first_frag )
919 { /* move to next fragment */
920 ++ self -> cur_frag;
921 ++ self -> frag_idx;
922 }
923
924 /* advance to next non-empty biological fragment */
925 for ( self -> seen_first_frag = true; self -> frag_idx < self -> frag_max; ++ self -> frag_idx )
926 {
927 if ( self -> READ_LEN [ self -> frag_idx ] != 0 )
928 {
929 /* get coordinates */
930 self -> frag_start += self -> frag_len;
931 self -> frag_len = self -> READ_LEN [ self -> frag_idx ];
932
933 /* test for biological fragment */
934 assert ( READ_TYPE_TECHNICAL == 0 );
935 assert ( READ_TYPE_BIOLOGICAL == 1 );
936 if ( ( self -> READ_TYPE [ self -> frag_idx ] & READ_TYPE_BIOLOGICAL ) != 0 )
937 return true;
938 }
939 }
940
941 self -> seen_last_frag = true;
942 return false;
943 }
944
945