1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include "CSRA1_Alignment.h"
28
29 typedef struct CSRA1_Alignment CSRA1_Alignment;
30 #define NGS_ALIGNMENT CSRA1_Alignment
31
32 #include "NGS_Alignment.h"
33 #include "NGS_ReadCollection.h"
34 #include "NGS_Refcount.h"
35 #include "NGS_Read.h"
36
37 #include "NGS_Id.h"
38 #include "NGS_String.h"
39 #include "NGS_Cursor.h"
40
41 #include "CSRA1_ReadCollection.h"
42
43 #include <sysalloc.h>
44
45 #include <vdb/cursor.h>
46 #include <vdb/vdb-priv.h>
47
48 #include <klib/printf.h>
49 #include <klib/rc.h>
50
51 #include <kfc/ctx.h>
52 #include <kfc/rsrc.h>
53 #include <kfc/except.h>
54 #include <kfc/xc.h>
55
56 #include <limits.h>
57
58 #ifndef min
59 # define min(a,b) ( (a) < (b) ? (a) : (b) )
60 #endif
61
62 /*--------------------------------------------------------------------------
63 * CSRA1_Alignment
64 */
65
66 /* align_col_specs must be kept in sync with enum AlignmentTableColumns */
67 static const char * align_col_specs [] =
68 {
69 "(I32)MAPQ",
70 "(INSDC:SRA:read_filter)READ_FILTER",
71 "(ascii)CIGAR_LONG",
72 "(ascii)CIGAR_SHORT",
73 "(ascii)CLIPPED_CIGAR_LONG",
74 "(ascii)CLIPPED_CIGAR_SHORT",
75 "(INSDC:quality:phred)CLIPPED_QUALITY",
76 "(INSDC:dna:text)CLIPPED_READ",
77 "(INSDC:coord:len)LEFT_SOFT_CLIP",
78 "(INSDC:coord:len)RIGHT_SOFT_CLIP",
79 "(INSDC:quality:phred)QUALITY",
80 "(INSDC:dna:text)RAW_READ",
81 "(INSDC:dna:text)READ",
82 "(I64)REF_ID",
83 "(INSDC:coord:len)REF_LEN",
84 "(ascii)REF_SEQ_ID", /* was REF_NAME changed March 23 2015 */
85 "(bool)REF_ORIENTATION",
86 "(INSDC:coord:zero)REF_POS",
87 "(INSDC:dna:text)REF_READ",
88 "(INSDC:coord:one)SEQ_READ_ID",
89 "(I64)SEQ_SPOT_ID",
90 "(ascii)SPOT_GROUP",
91 "(I32)TEMPLATE_LEN",
92 "(ascii)RNA_ORIENTATION",
93 "(I64)MATE_ALIGN_ID",
94 "(ascii)MATE_REF_SEQ_ID",
95 "(ascii)MATE_REF_NAME", /* to be used if MATE_REF_SEQ_ID is absent */
96 "(bool)MATE_REF_ORIENTATION",
97 "(bool)HAS_REF_OFFSET",
98 "(I32)REF_OFFSET"
99 };
100 /* Made changes to align_col_specs? - Make the same in enum AlignmentTableColumns! */
101
102 /* enum AlignmentTableColumns must be kept in sync with align_col_specs */
103 enum AlignmentTableColumns
104 {
105 align_MAPQ,
106 align_READ_FILTER,
107 align_CIGAR_LONG,
108 align_CIGAR_SHORT,
109 align_CLIPPED_CIGAR_LONG,
110 align_CLIPPED_CIGAR_SHORT,
111 align_CLIPPED_QUALITY,
112 align_CLIPPED_READ,
113 align_LEFT_SOFT_CLIP,
114 align_RIGHT_SOFT_CLIP,
115 align_QUALITY,
116 align_RAW_READ,
117 align_READ,
118 align_REF_ID,
119 align_REF_LEN,
120 align_REF_SEQ_ID,
121 align_REF_ORIENTATION,
122 align_REF_POS,
123 align_REF_READ,
124 align_SEQ_READ_ID,
125 align_SEQ_SPOT_ID,
126 align_SPOT_GROUP,
127 align_TEMPLATE_LEN,
128 align_RNA_ORIENTATION,
129 align_MATE_ALIGN_ID,
130 align_MATE_REF_SEQ_ID,
131 align_MATE_REF_NAME,
132 align_MATE_REF_ORIENTATION,
133 align_HAS_REF_OFFSET,
134 align_REF_OFFSET,
135
136 align_NUM_COLS
137 };
138 /* Made changes to enum AlignmentTableColumns? - Make the same in align_col_specs! */
139
140
CSRA1_AlignmentMakeDb(ctx_t ctx,struct VDatabase const * db,struct NGS_String const * run_name,char const * table_name)141 struct NGS_Cursor const* CSRA1_AlignmentMakeDb ( ctx_t ctx,
142 struct VDatabase const* db,
143 struct NGS_String const* run_name,
144 char const* table_name )
145 {
146 return NGS_CursorMakeDb ( ctx, db, run_name, table_name, align_col_specs, align_NUM_COLS );
147 }
148
149 struct CSRA1_Alignment
150 {
151 NGS_Refcount dad;
152 struct CSRA1_ReadCollection * coll;
153 const NGS_String * run_name;
154
155 int64_t cur_row;
156 int64_t row_max;
157
158 const NGS_Cursor * primary_curs;
159 const NGS_Cursor * secondary_curs;
160 NGS_String * col_data [ align_NUM_COLS ];
161
162 uint64_t id_offset;
163
164 bool seen_first;
165 bool in_primary;
166
167 /* for use in slices */
168 int64_t secondary_start;
169 int64_t secondary_max;
170
171 /* data to be accessed via CellData */
172 void const* cell_data [ align_NUM_COLS ];
173 uint32_t cell_len [ align_NUM_COLS ];
174 };
175
CSRA1_AlignmentGetCellData(CSRA1_Alignment * self,ctx_t ctx,uint32_t col_idx)176 static void const* CSRA1_AlignmentGetCellData ( CSRA1_Alignment * self,
177 ctx_t ctx,
178 uint32_t col_idx
179 )
180 {
181 if ( self -> cell_data [ col_idx ] == NULL )
182 {
183 assert ( self -> cell_len [ col_idx ] == 0 );
184
185 if ( ! self -> seen_first )
186 {
187 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
188 return NULL;
189 }
190
191 NGS_CursorCellDataDirect ( self -> in_primary ? self->primary_curs : self->secondary_curs,
192 ctx,
193 self->cur_row,
194 col_idx,
195 NULL,
196 & self -> cell_data [ col_idx ],
197 NULL,
198 & self -> cell_len [ col_idx ]
199 );
200
201 if ( FAILED() )
202 {
203 self -> cell_data [ col_idx ] = NULL;
204 self -> cell_len [ col_idx ] = 0;
205 }
206 }
207
208 return self -> cell_data [ col_idx ];
209 }
210
211 #define GetCursor( self ) ( self -> in_primary ? self -> primary_curs : self -> secondary_curs )
212
213 /* Whack
214 */
215 static
CSRA1_AlignmentWhack(CSRA1_Alignment * self,ctx_t ctx)216 void CSRA1_AlignmentWhack ( CSRA1_Alignment * self, ctx_t ctx )
217 {
218 uint32_t i;
219 for ( i = 0; i < align_NUM_COLS; ++ i )
220 {
221 NGS_StringRelease ( self -> col_data [ i ], ctx );
222 self -> col_data [ i ] = NULL;
223 }
224
225 NGS_CursorRelease ( self -> primary_curs, ctx );
226 NGS_CursorRelease ( self -> secondary_curs, ctx );
227
228 NGS_StringRelease ( self -> run_name, ctx );
229 CSRA1_ReadCollectionRelease ( self -> coll, ctx );
230 }
231
232 static
CSRA1_AlignmentInitRegion(CSRA1_Alignment * self,ctx_t ctx,const NGS_Cursor * primary,const NGS_Cursor * secondary,int64_t start,uint64_t count)233 void CSRA1_AlignmentInitRegion ( CSRA1_Alignment * self,
234 ctx_t ctx,
235 const NGS_Cursor * primary,
236 const NGS_Cursor * secondary,
237 int64_t start,
238 uint64_t count )
239 {
240 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
241 /*printf("CSRA1_AlignmentInitRegion(primary=%p, secondary=%p, start=%ld, count=%lu, offset=%lu)\n",
242 (void*)primary, (void*)secondary, start, count, self -> id_offset); */
243
244 /* split the requested region across primary/secondary table, adjust the boundaries as necessary */
245 if ( ! FAILED () )
246 {
247 if ( primary != NULL )
248 {
249 int64_t primary_start;
250 uint64_t primary_count;
251 TRY ( NGS_CursorGetRowRange ( primary, ctx, & primary_start, & primary_count ) )
252 {
253 uint64_t table_end;
254
255 if ( start < primary_start )
256 {
257 count -= ( primary_start - start );
258 start = primary_start;
259 }
260
261 table_end = primary_start + primary_count;
262
263 if ( start < (int64_t)table_end )
264 {
265 self -> cur_row = start;
266 self -> row_max = min ( table_end, (uint64_t)start + min ( count, primary_count ) );
267
268 if ( self -> row_max == table_end )
269 { /* a part of the range is in the secondary; reduce the count by the number or records from primary */
270 count -= (uint64_t) ( (int64_t)self -> row_max - self -> cur_row );
271 start = 1; /* this will be the starting rowId in the secondary */
272 }
273 }
274 else
275 { /* the entire range is beyond the primary cursor;
276 set up so that the first call to Next() will go into the secondary cursor */
277 self -> cur_row = self -> row_max = table_end;
278 start -= self -> id_offset; /* this will be the starting rowId in the secondary */
279 self -> in_primary = false;
280 }
281 }
282 }
283 else
284 { /* primary not requested */
285 if ( start <= (int64_t)self -> id_offset )
286 { /* range overlaps the primary ID space; adjust the range to exclude primary */
287 count -= ( self -> id_offset - start + 1 );
288 start = 1;
289 }
290 else
291 {
292 start -= self -> id_offset; /* this will be the starting rowId in the secondary */
293 }
294 /* make sure the first call to Next() will switch to the secondary cursor */
295 self -> cur_row = self -> row_max = self -> id_offset + 1;
296 self -> in_primary = false;
297 }
298
299 if ( ! FAILED () && secondary != NULL )
300 {
301 int64_t secondary_start;
302 uint64_t secondary_count;
303 TRY ( NGS_CursorGetRowRange ( secondary, ctx, & secondary_start, & secondary_count ) )
304 {
305 uint64_t table_end;
306 if ( start < secondary_start )
307 {
308 count -= ( secondary_start - start );
309 start = secondary_start;
310 }
311
312 table_end = secondary_start + secondary_count;
313
314 if ( start < (int64_t)table_end )
315 {
316 self -> secondary_start = start;
317 self -> secondary_max = min ( table_end, (uint64_t)start + min ( count, secondary_count ) );
318 }
319 else
320 {
321 self -> secondary_start = self -> secondary_max = table_end;
322 }
323 }
324 if ( ! self -> in_primary )
325 {
326 self -> cur_row = self -> secondary_start;
327 self -> row_max = self -> secondary_max;
328 }
329 }
330 /* printf("cur_row=%ld, row_max=%lu, secondary_start=%ld, secondary_max=%lu)\n",
331 self->cur_row, self->row_max, self -> secondary_start, self -> secondary_max); */
332 }
333 }
334
CSRA1_AlignmentGetAlignmentId(CSRA1_Alignment * self,ctx_t ctx)335 NGS_String * CSRA1_AlignmentGetAlignmentId( CSRA1_Alignment* self, ctx_t ctx )
336 {
337 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
338
339 if ( ! self -> seen_first )
340 {
341 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
342 return NULL;
343 }
344
345 /* if current row is valid, read data */
346 if ( self -> cur_row < self -> row_max )
347 {
348 if ( self -> in_primary )
349 {
350 return NGS_IdMake ( ctx, self -> run_name, NGSObject_PrimaryAlignment, self -> cur_row );
351 }
352 else
353 {
354 return NGS_IdMake ( ctx, self -> run_name, NGSObject_SecondaryAlignment, self -> cur_row + self -> id_offset );
355 }
356 }
357
358 USER_ERROR ( xcCursorExhausted, "No more rows available" );
359 return NULL;
360 }
361
CSRA1_AlignmentGetReferenceSpec(CSRA1_Alignment * self,ctx_t ctx)362 struct NGS_String* CSRA1_AlignmentGetReferenceSpec( CSRA1_Alignment* self, ctx_t ctx )
363 {
364 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
365 if ( ! self -> seen_first )
366 {
367 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
368 return NULL;
369 }
370
371 return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_REF_SEQ_ID );
372 }
373
CSRA1_AlignmentGetMappingQuality(CSRA1_Alignment * self,ctx_t ctx)374 int CSRA1_AlignmentGetMappingQuality( CSRA1_Alignment* self, ctx_t ctx )
375 {
376 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
377 if ( ! self -> seen_first )
378 {
379 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
380 return 0;
381 }
382 return NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_MAPQ );
383 }
384
CSRA1_AlignmentGetReadFilter(CSRA1_Alignment * self,ctx_t ctx)385 INSDC_read_filter CSRA1_AlignmentGetReadFilter( CSRA1_Alignment* self, ctx_t ctx )
386 {
387 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
388 if ( ! self -> seen_first )
389 {
390 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
391 return 0;
392 }
393 assert ( sizeof ( INSDC_read_filter ) == sizeof ( char ) );
394 return ( uint8_t ) NGS_CursorGetChar ( GetCursor ( self ), ctx, self -> cur_row, align_READ_FILTER );
395 }
396
CSRA1_AlignmentGetReferenceBases(CSRA1_Alignment * self,ctx_t ctx)397 struct NGS_String* CSRA1_AlignmentGetReferenceBases( CSRA1_Alignment* self, ctx_t ctx )
398 {
399 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
400 if ( ! self -> seen_first )
401 {
402 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
403 return NULL;
404 }
405 return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_REF_READ );
406 }
407
CSRA1_AlignmentGetReadGroup(CSRA1_Alignment * self,ctx_t ctx)408 struct NGS_String* CSRA1_AlignmentGetReadGroup( CSRA1_Alignment* self, ctx_t ctx )
409 {
410 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
411 if ( ! self -> seen_first )
412 {
413 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
414 return NULL;
415 }
416 else
417 {
418 TRY ( NGS_String* ret = NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_SPOT_GROUP ) )
419 {
420 return ret;
421 }
422 CATCH_ALL ()
423 {
424 CLEAR();
425 }
426 return NGS_StringMake ( ctx, "", 0 );
427 }
428 }
429
CSRA1_AlignmentGetReadId(CSRA1_Alignment * self,ctx_t ctx)430 NGS_String * CSRA1_AlignmentGetReadId( CSRA1_Alignment* self, ctx_t ctx )
431 {
432 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
433
434 if ( ! self -> seen_first )
435 {
436 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
437 return 0;
438 }
439
440 {
441 TRY ( int64_t readId = NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_SEQ_SPOT_ID ) )
442 {
443 return NGS_IdMake ( ctx, self -> run_name, NGSObject_Read, readId );
444 }
445 }
446 return NULL;
447 }
448
CSRA1_AlignmentGetClippedFragmentBases(CSRA1_Alignment * self,ctx_t ctx)449 struct NGS_String* CSRA1_AlignmentGetClippedFragmentBases( CSRA1_Alignment* self, ctx_t ctx )
450 {
451 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
452 if ( ! self -> seen_first )
453 {
454 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
455 return NULL;
456 }
457
458 return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_CLIPPED_READ );
459 }
460
CSRA1_AlignmentGetClippedFragmentQualities(CSRA1_Alignment * self,ctx_t ctx)461 struct NGS_String* CSRA1_AlignmentGetClippedFragmentQualities( CSRA1_Alignment* self, ctx_t ctx )
462 {
463 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
464 if ( ! self -> seen_first )
465 {
466 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
467 return NULL;
468 }
469
470 {
471 NGS_String* phred = NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_CLIPPED_QUALITY );
472 /* convert to ascii-33 */
473 size_t size = NGS_StringSize ( phred, ctx );
474 char * copy = malloc ( size + 1 );
475 if ( copy == NULL )
476 {
477 SYSTEM_ERROR ( xcNoMemory,
478 "allocating %u bytes for %s row %ld",
479 size + 1, "CLIPPED_QUALITY", self -> cur_row );
480 NGS_StringRelease ( phred, ctx );
481 return NULL;
482 }
483 else
484 {
485 NGS_String* ret;
486 const char* orig = NGS_StringData ( phred, ctx );
487 size_t i;
488 for ( i = 0; i < size ; ++ i )
489 copy [ i ] = ( char ) ( (uint8_t)(orig [ i ]) + 33 );
490 copy [ i ] = 0;
491
492 ret = NGS_StringMakeOwned ( ctx, copy, size );
493 if ( FAILED () )
494 free ( copy );
495 NGS_StringRelease ( phred, ctx );
496 return ret;
497 }
498 }
499 }
500
CSRA1_AlignmentGetAlignedFragmentBases(CSRA1_Alignment * self,ctx_t ctx)501 struct NGS_String* CSRA1_AlignmentGetAlignedFragmentBases( CSRA1_Alignment* self, ctx_t ctx )
502 {
503 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
504 if ( ! self -> seen_first )
505 {
506 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
507 return NULL;
508 }
509
510 return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_READ );
511 }
512
CSRA1_AlignmentIsPrimary(CSRA1_Alignment * self,ctx_t ctx)513 bool CSRA1_AlignmentIsPrimary( CSRA1_Alignment* self, ctx_t ctx )
514 {
515 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
516
517 assert ( self );
518 if ( ! self -> seen_first )
519 {
520 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
521 return false;
522 }
523
524 return self -> in_primary;
525 }
526
CSRA1_AlignmentGetAlignmentPosition(CSRA1_Alignment * self,ctx_t ctx)527 int64_t CSRA1_AlignmentGetAlignmentPosition( CSRA1_Alignment* self, ctx_t ctx )
528 {
529 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
530 if ( ! self -> seen_first )
531 {
532 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
533 return 0;
534 }
535
536 return NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_REF_POS);
537 }
538
CSRA1_AlignmentGetReferencePositionProjectionRange(CSRA1_Alignment * self,ctx_t ctx,int64_t ref_pos)539 uint64_t CSRA1_AlignmentGetReferencePositionProjectionRange( CSRA1_Alignment* self, ctx_t ctx, int64_t ref_pos )
540 {
541 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
542 uint64_t ret;
543 bool const* HAS_REF_OFFSET;
544 int32_t const* REF_OFFSET;
545
546 if ( ! self -> seen_first )
547 {
548 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
549 return (uint64_t)-1;
550 }
551
552 REF_OFFSET = CSRA1_AlignmentGetCellData ( self, ctx, align_REF_OFFSET );
553 /* Check for error, REF_OFFSET == NULL? if ( ) */
554
555 /* if there is no indels just calculate projection as (ref_pos - REF_POS) with len = 1 */
556 if ( self -> cell_len [ align_REF_OFFSET ] == 0 )
557 {
558 int32_t align_len = NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_REF_LEN);
559 ret = ref_pos - NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_REF_POS);
560
561 if ( FAILED() )
562 {
563 SYSTEM_ERROR ( xcIteratorUninitialized, "Failed to access REF_LEN or REF_POS" );
564 return (uint64_t)-1;
565 }
566 else if ( ret >= align_len )
567 {
568 /* calculated projection is out of bounds, i.e. ref_pos
569 doesn't project on the alignment
570 (it also catches ref_pos < align_REF_POS case)
571 */
572 ret = (uint64_t)-1;
573 }
574 else
575 {
576 /* ref_pos has a projection on the current alignment -
577 pack it and make its length = 1
578 */
579 ret <<= 32;
580 ret |= 1;
581 }
582 }
583 else /* we have indels */
584 {
585 int32_t read_len;
586 int32_t idx_ref, idx_HAS_REF_OFFSET = 0, idx_REF_OFFSET = 0;
587 int32_t align_pos;
588 uint32_t proj_len;
589
590 HAS_REF_OFFSET = CSRA1_AlignmentGetCellData ( self, ctx, align_HAS_REF_OFFSET );
591 /* Check for error, HAS_REF_OFFSET == NULL? if ( ) */
592
593 if ( HAS_REF_OFFSET == NULL )
594 {
595 SYSTEM_ERROR ( xcIteratorUninitialized, "Failed to access HAS_REF_OFFSET" );
596 return (uint64_t)-1;
597 }
598
599 read_len = self -> cell_len [ align_HAS_REF_OFFSET ];
600 idx_ref = NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_REF_POS);
601
602 if ( FAILED () )
603 {
604 SYSTEM_ERROR ( xcIteratorUninitialized, "Failed to access REF_POS" );
605 return (uint64_t)-1;
606 }
607
608 if ( idx_ref > ref_pos )
609 {
610 /* the alignment starts beyond given ref_pos
611 out of bounds
612 */
613 ret = (uint64_t)-1;
614 }
615 else
616 {
617 for ( align_pos = 0, proj_len = 1; idx_ref < ref_pos && align_pos < read_len ; align_pos += proj_len )
618 {
619 bool has_ref_offset = HAS_REF_OFFSET [ idx_HAS_REF_OFFSET++ ];
620 if ( has_ref_offset == 0) /* match/mismatch */
621 {
622 ++idx_ref;
623 proj_len = 1;
624 }
625 else /* indel */
626 {
627 int32_t ref_offset = REF_OFFSET [ idx_REF_OFFSET++ ];
628
629 if ( ref_offset < 0 )
630 {
631 /* insertion */
632 proj_len = (uint32_t)-ref_offset;
633 ++idx_ref;
634 }
635 else
636 {
637 /* deletion */
638 assert ( ref_offset > 0 );
639
640 idx_ref += ref_offset;
641 proj_len = 0;
642 }
643 }
644 }
645
646 /* in the case we exited from the loop at the insertion, align_pos points beyond
647 the insertion - it should be restored to point to the beginning of the insertion
648 */
649 if ( proj_len > 1 )
650 align_pos -= proj_len;
651
652 if ( align_pos >= read_len )
653 {
654 align_pos = -1;
655 proj_len = 0;
656 }
657
658 ret = ((uint64_t)align_pos << 32) | proj_len;
659 }
660 }
661
662 return ret;
663 }
664
CSRA1_AlignmentGetAlignmentLength(CSRA1_Alignment * self,ctx_t ctx)665 uint64_t CSRA1_AlignmentGetAlignmentLength( CSRA1_Alignment* self, ctx_t ctx )
666 {
667 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
668 if ( ! self -> seen_first )
669 {
670 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
671 return 0;
672 }
673
674 return NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_REF_LEN);
675 }
676
CSRA1_AlignmentGetIsReversedOrientation(CSRA1_Alignment * self,ctx_t ctx)677 bool CSRA1_AlignmentGetIsReversedOrientation( CSRA1_Alignment* self, ctx_t ctx )
678 {
679 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
680 if ( ! self -> seen_first )
681 {
682 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
683 return false;
684 }
685
686 return NGS_CursorGetBool ( GetCursor ( self ), ctx, self -> cur_row, align_REF_ORIENTATION);
687 }
688
CSRA1_AlignmentGetSoftClip(CSRA1_Alignment * self,ctx_t ctx,bool left)689 int CSRA1_AlignmentGetSoftClip( CSRA1_Alignment* self, ctx_t ctx, bool left )
690 {
691 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
692 if ( ! self -> seen_first )
693 {
694 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
695 return 0;
696 }
697
698 return NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, left ? align_LEFT_SOFT_CLIP : align_RIGHT_SOFT_CLIP );
699 }
700
CSRA1_AlignmentGetTemplateLength(CSRA1_Alignment * self,ctx_t ctx)701 uint64_t CSRA1_AlignmentGetTemplateLength( CSRA1_Alignment* self, ctx_t ctx )
702 {
703 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
704 if ( ! self -> seen_first )
705 {
706 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
707 return 0;
708 }
709
710 return NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_TEMPLATE_LEN);
711 }
712
CSRA1_AlignmentGetShortCigar(CSRA1_Alignment * self,ctx_t ctx,bool clipped)713 struct NGS_String* CSRA1_AlignmentGetShortCigar( CSRA1_Alignment* self, ctx_t ctx, bool clipped )
714 {
715 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
716 if ( ! self -> seen_first )
717 {
718 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
719 return NULL;
720 }
721
722 return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, clipped ? align_CLIPPED_CIGAR_SHORT: align_CIGAR_SHORT );
723 }
724
CSRA1_AlignmentGetLongCigar(CSRA1_Alignment * self,ctx_t ctx,bool clipped)725 struct NGS_String* CSRA1_AlignmentGetLongCigar( CSRA1_Alignment* self, ctx_t ctx, bool clipped )
726 {
727 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
728 if ( ! self -> seen_first )
729 {
730 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
731 return NULL;
732 }
733
734 return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, clipped ? align_CLIPPED_CIGAR_LONG : align_CIGAR_LONG );
735 }
736
CSRA1_AlignmentGetRNAOrientation(CSRA1_Alignment * self,ctx_t ctx)737 char CSRA1_AlignmentGetRNAOrientation( CSRA1_Alignment* self, ctx_t ctx )
738 {
739 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
740 if ( ! self -> seen_first )
741 {
742 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
743 }
744 else
745 {
746 TRY ( char ret = NGS_CursorGetChar ( GetCursor ( self ), ctx, self -> cur_row, align_RNA_ORIENTATION ) )
747 {
748 return ret;
749 }
750 CATCH_ALL ()
751 {
752 CLEAR();
753 }
754 }
755 return '?';
756 }
757
CSRA1_AlignmentHasMate(CSRA1_Alignment * self,ctx_t ctx)758 bool CSRA1_AlignmentHasMate( CSRA1_Alignment* self, ctx_t ctx )
759 {
760 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
761 if ( ! self -> seen_first )
762 {
763 USER_WARNING ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
764 return false;
765 }
766
767 TRY ( NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_ALIGN_ID ) )
768 {
769 int64_t mate_seq_spot_id;
770
771 if ( self -> in_primary )
772 return true;
773
774 TRY ( mate_seq_spot_id = NGS_CursorGetInt64 ( self -> secondary_curs, ctx, self -> cur_row, align_SEQ_SPOT_ID ) )
775 {
776 if ( mate_seq_spot_id > 0 )
777 return true;
778 }
779 }
780
781 CLEAR();
782
783 return false;
784 }
785
CSRA1_AlignmentGetMateAlignmentId(CSRA1_Alignment * self,ctx_t ctx)786 NGS_String * CSRA1_AlignmentGetMateAlignmentId( CSRA1_Alignment* self, ctx_t ctx )
787 {
788 int64_t mateId;
789 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
790
791 if ( ! self -> seen_first )
792 {
793 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
794 return 0;
795 }
796
797 TRY ( mateId = NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_ALIGN_ID ) )
798 {
799 if ( ! self -> in_primary )
800 {
801 TRY ( int64_t mate_seq_spot_id = NGS_CursorGetInt64 ( self -> secondary_curs, ctx, mateId, align_SEQ_SPOT_ID ) )
802 {
803 if ( mate_seq_spot_id <= 0 )
804 {
805 INTERNAL_ERROR ( xcSecondaryAlignmentMissingPrimary,
806 "secondary mate alignment id ( %li ) missing primary within %.*s",
807 mateId + self -> id_offset,
808 NGS_StringSize ( self -> run_name, ctx ),
809 NGS_StringData ( self -> run_name, ctx ) );
810 }
811 }
812 }
813
814 if ( ! FAILED () )
815 {
816 return NGS_IdMake ( ctx,
817 self -> run_name,
818 self -> in_primary ? NGSObject_PrimaryAlignment : NGSObject_SecondaryAlignment,
819 self -> in_primary ? mateId : mateId + self -> id_offset );
820 }
821 }
822 return NULL;
823 }
824
CSRA1_AlignmentGetMateReferenceSpec(CSRA1_Alignment * self,ctx_t ctx)825 struct NGS_String* CSRA1_AlignmentGetMateReferenceSpec( CSRA1_Alignment* self, ctx_t ctx )
826 {
827 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
828 if ( ! self -> seen_first )
829 {
830 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
831 return NULL;
832 }
833
834 {
835 NGS_String* ret;
836 TRY ( ret = NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_REF_SEQ_ID) )
837 {
838 return ret;
839 }
840 if ( (int)GetRCObject ( ctx -> rc ) == rcColumn && GetRCState ( ctx -> rc ) == rcNotFound )
841 {
842 CLEAR ();
843 return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_REF_NAME );
844 }
845 }
846 return NULL;
847 }
848
CSRA1_AlignmentGetMateIsReversedOrientation(CSRA1_Alignment * self,ctx_t ctx)849 bool CSRA1_AlignmentGetMateIsReversedOrientation( CSRA1_Alignment* self, ctx_t ctx )
850 {
851 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
852 if ( ! self -> seen_first )
853 {
854 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
855 return false;
856 }
857
858 return NGS_CursorGetBool ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_REF_ORIENTATION);
859 }
860
CSRA1_AlignmentIsFirst(CSRA1_Alignment * self,ctx_t ctx)861 bool CSRA1_AlignmentIsFirst( CSRA1_Alignment* self, ctx_t ctx )
862 {
863 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
864 int32_t seq_read_id;
865
866 assert ( self );
867 if ( ! self -> seen_first )
868 {
869 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
870 return false;
871 }
872
873 TRY ( seq_read_id = NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_SEQ_READ_ID ) )
874 {
875 return seq_read_id == 1;
876 }
877 return false;
878 }
879
880
881 /*--------------------------------------------------------------------------
882 * NGS_AlignmentIterator
883 */
884 static
CSRA1_AlignmentIteratorNext(CSRA1_Alignment * self,ctx_t ctx)885 bool CSRA1_AlignmentIteratorNext ( CSRA1_Alignment* self, ctx_t ctx )
886 {
887 assert ( self != NULL );
888
889 if ( !self -> seen_first )
890 {
891 self -> seen_first = true;
892 }
893 else
894 {
895 ++ self -> cur_row;
896 }
897
898 for ( ; self -> cur_row < self -> row_max; ++ self -> cur_row )
899 {
900 int64_t seq_spot_id;
901
902 if ( self -> in_primary )
903 return true;
904
905 TRY ( seq_spot_id = NGS_CursorGetInt64 ( self -> secondary_curs, ctx, self -> cur_row, align_SEQ_SPOT_ID ) )
906 {
907 if ( seq_spot_id > 0 )
908 return true;
909 }
910
911 CLEAR ();
912 }
913
914 /* see if we need to switch over to the next cursor */
915 if ( self -> in_primary && self -> secondary_curs != NULL )
916 {
917 self -> in_primary = false;
918
919 self -> cur_row = self -> secondary_start;
920 self -> row_max = self -> secondary_max;
921
922 /* let's re-run "next" again to check SEQ_SPOT_ID */
923 self -> seen_first = false;
924 return CSRA1_AlignmentIteratorNext ( self, ctx );
925 }
926
927 return false;
928 }
929
930 static CSRA1_Alignment* CSRA1_AlignmentGetMateAlignment( CSRA1_Alignment* self, ctx_t ctx );
931
932 static
CSRA1_FragmentGetId(CSRA1_Alignment * self,ctx_t ctx)933 NGS_String* CSRA1_FragmentGetId ( CSRA1_Alignment * self, ctx_t ctx )
934 {
935 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
936
937 assert ( self != NULL );
938 if ( ! self -> seen_first )
939 {
940 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
941 return NULL;
942 }
943
944 {
945 TRY ( int32_t fragId = NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_SEQ_READ_ID ) )
946 {
947 return NGS_IdMakeFragment ( ctx, self -> run_name, true, self -> cur_row, fragId - 1 );
948 }
949 }
950 return NULL;
951 }
952
953 static
CSRA1_FragmentGetSequence(CSRA1_Alignment * self,ctx_t ctx,uint64_t offset,uint64_t length)954 struct NGS_String * CSRA1_FragmentGetSequence ( CSRA1_Alignment * self, ctx_t ctx, uint64_t offset, uint64_t length )
955 {
956 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
957 NGS_String * seq;
958
959 if ( ! self -> seen_first )
960 {
961 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
962 return NULL;
963 }
964
965 TRY ( seq = NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_RAW_READ ) )
966 {
967 TRY ( NGS_String * sub = NGS_StringSubstrOffsetSize ( seq, ctx, offset, length ) )
968 {
969 NGS_StringRelease ( seq, ctx );
970 seq = sub;
971 }
972 }
973
974 return seq;
975 }
976
977 static
CSRA1_FragmentGetQualities(CSRA1_Alignment * self,ctx_t ctx,uint64_t offset,uint64_t length)978 struct NGS_String * CSRA1_FragmentGetQualities ( CSRA1_Alignment * self, ctx_t ctx, uint64_t offset, uint64_t length )
979 {
980 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
981 NGS_String * ret = NULL;
982
983 if ( ! self -> seen_first )
984 {
985 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
986 }
987 else
988 {
989 const void * base;
990 uint32_t elem_bits, boff, row_len;
991 TRY ( NGS_CursorCellDataDirect ( GetCursor ( self ), ctx, self -> cur_row, align_QUALITY, & elem_bits, & base, & boff, & row_len ) )
992 {
993 assert ( elem_bits == 8 );
994 assert ( boff == 0 );
995
996 if ( offset > row_len )
997 {
998 length = 0;
999 }
1000 else if ( offset + length > row_len )
1001 {
1002 length = row_len - offset;
1003 }
1004
1005 { /* convert to ascii-33 */
1006 char * copy = malloc ( length + 1 );
1007 if ( copy == NULL )
1008 SYSTEM_ERROR ( xcNoMemory, "allocating %u bytes for QUALITY row %ld", row_len + 1, self -> cur_row );
1009 else
1010 {
1011 uint32_t i;
1012 const uint8_t * orig = base;
1013 for ( i = 0; i < length; ++ i )
1014 {
1015 copy [ i ] = ( char ) ( orig [ offset + i ] + 33 );
1016 }
1017 copy [ length ] = 0;
1018
1019 ret = NGS_StringMakeOwned ( ctx, copy, length );
1020 if ( FAILED () )
1021 {
1022 free ( copy );
1023 }
1024 }
1025 }
1026 }
1027 }
1028
1029 return ret;
1030 }
1031
1032 static
CSRA1_FragmentIsPaired(CSRA1_Alignment * self,ctx_t ctx)1033 bool CSRA1_FragmentIsPaired ( CSRA1_Alignment * self, ctx_t ctx )
1034 {
1035 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
1036
1037 int64_t id;
1038 int32_t idx;
1039 bool ret = false;
1040
1041 if ( ! self -> seen_first )
1042 {
1043 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
1044 return false;
1045 }
1046
1047 TRY ( id = NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_ALIGN_ID ) )
1048 {
1049 }
1050 CATCH_ALL ()
1051 {
1052 /* if we failed, it means that MATE_ALIGN_ID column is empty, so lets assign it to 0 and move forward */
1053 CLEAR();
1054 id = 0;
1055 }
1056
1057 /* if MATE_ALIGN_ID != 0, it's paired */
1058 if ( id != 0 )
1059 return true;
1060
1061 TRY ( idx = NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_SEQ_READ_ID ) )
1062 {
1063 /* if SEQ_READ_ID > 1, it's paired. */
1064 if ( idx > 1 )
1065 return true;
1066
1067 TRY ( id = NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_SEQ_SPOT_ID ) )
1068 {
1069 NGS_String * readId;
1070
1071 /* otherwise, have to get spot id and consult SEQUENCE table */
1072 TRY ( readId = NGS_IdMake ( ctx, self -> run_name, NGSObject_Read, id ) )
1073 {
1074 const char * readIdStr = NGS_StringData ( readId, ctx );
1075 TRY ( NGS_Read * read = NGS_ReadCollectionGetRead ( ( NGS_ReadCollection * ) self -> coll, ctx, readIdStr ) )
1076 {
1077 uint32_t numFragments = NGS_ReadNumFragments(read, ctx);
1078 ret = numFragments > 1;
1079
1080 NGS_ReadRelease ( read, ctx );
1081 }
1082
1083 NGS_StringRelease ( readId, ctx );
1084 }
1085 }
1086 }
1087
1088 return ret;
1089 }
1090
1091 static
CSRA1_FragmentIsAligned(CSRA1_Alignment * self,ctx_t ctx)1092 bool CSRA1_FragmentIsAligned ( CSRA1_Alignment * self, ctx_t ctx )
1093 {
1094 assert ( self != NULL );
1095 return true;
1096 }
1097
1098 static
CSRA1_FragmentNext(CSRA1_Alignment * self,ctx_t ctx)1099 bool CSRA1_FragmentNext ( CSRA1_Alignment * self, ctx_t ctx )
1100 {
1101 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
1102 if ( ! self -> seen_first )
1103 {
1104 USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
1105 return false;
1106 }
1107
1108 UNIMPLEMENTED(); /* CSRA1_FragmentNext; should not be called - Alignment is not a FragmentIterator */
1109
1110 return false;
1111 }
1112
1113
1114 static NGS_Alignment_vt CSRA1_Alignment_vt_inst =
1115 {
1116 {
1117 {
1118 /* NGS_Refcount */
1119 CSRA1_AlignmentWhack
1120 },
1121
1122 /* NGS_Fragment */
1123 CSRA1_FragmentGetId,
1124 CSRA1_FragmentGetSequence,
1125 CSRA1_FragmentGetQualities,
1126 CSRA1_FragmentIsPaired,
1127 CSRA1_FragmentIsAligned,
1128 CSRA1_FragmentNext
1129 },
1130
1131 CSRA1_AlignmentGetAlignmentId,
1132 CSRA1_AlignmentGetReferenceSpec,
1133 CSRA1_AlignmentGetMappingQuality,
1134 CSRA1_AlignmentGetReadFilter,
1135 CSRA1_AlignmentGetReferenceBases,
1136 CSRA1_AlignmentGetReadGroup,
1137 CSRA1_AlignmentGetReadId,
1138 CSRA1_AlignmentGetClippedFragmentBases,
1139 CSRA1_AlignmentGetClippedFragmentQualities,
1140 CSRA1_AlignmentGetAlignedFragmentBases,
1141 CSRA1_AlignmentIsPrimary,
1142 CSRA1_AlignmentGetAlignmentPosition,
1143 CSRA1_AlignmentGetReferencePositionProjectionRange,
1144 CSRA1_AlignmentGetAlignmentLength,
1145 CSRA1_AlignmentGetIsReversedOrientation,
1146 CSRA1_AlignmentGetSoftClip,
1147 CSRA1_AlignmentGetTemplateLength,
1148 CSRA1_AlignmentGetShortCigar,
1149 CSRA1_AlignmentGetLongCigar,
1150 CSRA1_AlignmentGetRNAOrientation,
1151 CSRA1_AlignmentHasMate,
1152 CSRA1_AlignmentGetMateAlignmentId,
1153 CSRA1_AlignmentGetMateAlignment,
1154 CSRA1_AlignmentGetMateReferenceSpec,
1155 CSRA1_AlignmentGetMateIsReversedOrientation,
1156 CSRA1_AlignmentIsFirst,
1157
1158 /* Iterator */
1159 CSRA1_AlignmentIteratorNext
1160 };
1161
1162 /* Init
1163 */
1164 static
CSRA1_AlignmentInit(NGS_ALIGNMENT * ref,ctx_t ctx,struct CSRA1_ReadCollection * coll,const char * clsname,const char * instname,const char * run_name,size_t run_name_size,bool exclusive,bool primary,bool secondary,uint64_t id_offset)1165 void CSRA1_AlignmentInit ( NGS_ALIGNMENT* ref,
1166 ctx_t ctx,
1167 struct CSRA1_ReadCollection * coll,
1168 const char *clsname,
1169 const char *instname,
1170 const char * run_name, size_t run_name_size,
1171 bool exclusive,
1172 bool primary,
1173 bool secondary,
1174 uint64_t id_offset )
1175 {
1176 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
1177
1178 if ( ref == NULL )
1179 INTERNAL_ERROR ( xcParamNull, "bad object reference" );
1180 else
1181 {
1182 TRY ( NGS_AlignmentInit ( ctx, ref, & CSRA1_Alignment_vt_inst, clsname, instname ) )
1183 {
1184 if ( primary )
1185 {
1186 ON_FAIL ( ref -> primary_curs = CSRA1_ReadCollectionMakeAlignmentCursor ( coll,
1187 ctx,
1188 true,
1189 exclusive ) )
1190 return;
1191 ref -> in_primary = true;
1192 }
1193
1194 if ( secondary )
1195 {
1196 ON_FAIL ( ref -> secondary_curs = CSRA1_ReadCollectionMakeAlignmentCursor ( coll,
1197 ctx,
1198 false,
1199 exclusive ) )
1200 CLEAR(); /* missing SECONDARY_ALIGNMENTS table is OK */
1201 }
1202
1203 ref -> id_offset = id_offset;
1204
1205 ON_FAIL ( ref -> coll = CSRA1_ReadCollectionDuplicate ( coll, ctx ) )
1206 return;
1207 ON_FAIL ( ref -> run_name = NGS_StringMakeCopy ( ctx, run_name, run_name_size ) )
1208 return;
1209 }
1210 }
1211 }
1212
1213
1214 static
SetRowId(CSRA1_Alignment * self,ctx_t ctx,int64_t rowId,bool primary)1215 void SetRowId ( CSRA1_Alignment* self, ctx_t ctx, int64_t rowId, bool primary )
1216 { /* validate the requested rowId */
1217 if ( rowId <= 0 )
1218 {
1219 INTERNAL_ERROR ( xcCursorAccessFailed,
1220 "rowId ( %li ) out of range for %.*s",
1221 rowId,
1222 NGS_StringSize ( self -> run_name, ctx ),
1223 NGS_StringData ( self -> run_name, ctx ) );
1224 }
1225 else
1226 {
1227 int64_t id = rowId;
1228 int64_t start = 0;
1229 uint64_t count = 0;
1230
1231 if ( primary )
1232 {
1233 if ( self -> primary_curs != NULL )
1234 {
1235 ON_FAIL ( NGS_CursorGetRowRange ( self -> primary_curs, ctx, & start, & count ) )
1236 return;
1237 }
1238 }
1239 else if ( self -> secondary_curs != NULL )
1240 {
1241 ON_FAIL ( NGS_CursorGetRowRange ( self -> secondary_curs, ctx, & start, & count ) )
1242 return;
1243 id -= self -> id_offset;
1244 }
1245
1246 if ( (uint64_t)id >= start + count )
1247 {
1248 INTERNAL_ERROR ( xcCursorAccessFailed,
1249 "rowId ( %li ) out of range for %.*s",
1250 rowId,
1251 NGS_StringSize ( self -> run_name, ctx ),
1252 NGS_StringData ( self -> run_name, ctx ) );
1253 }
1254 else
1255 {
1256 if ( ! primary && self -> secondary_curs != NULL )
1257 {
1258 TRY ( int64_t spot_id = NGS_CursorGetInt64 ( self -> secondary_curs, ctx, id, align_SEQ_SPOT_ID ) )
1259 {
1260 if ( spot_id <= 0 )
1261 {
1262 INTERNAL_ERROR ( xcSecondaryAlignmentMissingPrimary,
1263 "secondary alignment id ( %li ) missing primary within %.*s",
1264 rowId,
1265 NGS_StringSize ( self -> run_name, ctx ),
1266 NGS_StringData ( self -> run_name, ctx ) );
1267 }
1268 }
1269 }
1270
1271 if ( ! FAILED () )
1272 {
1273 self -> cur_row = id;
1274 self -> row_max = id + 1;
1275 }
1276 }
1277 }
1278 }
1279
1280 /* Make
1281 * makes a common alignment from VCursor
1282 */
CSRA1_AlignmentMake(ctx_t ctx,struct CSRA1_ReadCollection * coll,int64_t alignId,char const * run_name,size_t run_name_size,bool primary,uint64_t id_offset)1283 NGS_Alignment * CSRA1_AlignmentMake ( ctx_t ctx,
1284 struct CSRA1_ReadCollection * coll,
1285 int64_t alignId,
1286 char const* run_name, size_t run_name_size,
1287 bool primary,
1288 uint64_t id_offset )
1289 {
1290 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
1291
1292 CSRA1_Alignment * ref;
1293
1294 ref = calloc ( 1, sizeof * ref );
1295 if ( ref == NULL )
1296 SYSTEM_ERROR ( xcNoMemory,
1297 "allocating CSRA1_Alignment(%lu) on '%.*s'",
1298 alignId,
1299 run_name_size,
1300 run_name );
1301 else
1302 {
1303 #if _DEBUGGING
1304 char instname [ 256 ];
1305 string_printf ( instname,
1306 sizeof instname,
1307 NULL,
1308 "%.*s(%lu)",
1309 run_name_size,
1310 run_name,
1311 alignId );
1312 instname [ sizeof instname - 1 ] = 0;
1313 #else
1314 const char *instname = "";
1315 #endif
1316 TRY ( CSRA1_AlignmentInit ( ref, ctx, coll, "CSRA1_Alignment", instname, run_name, run_name_size, false, primary, ! primary, id_offset ) )
1317 {
1318 TRY ( SetRowId( ref, ctx, alignId, primary ) )
1319 {
1320 ref -> seen_first = true;
1321 return ( NGS_Alignment * ) ref;
1322 }
1323 CSRA1_AlignmentWhack ( ref, ctx );
1324 }
1325 free ( ref );
1326 }
1327
1328 return NULL;
1329 }
1330
CSRA1_AlignmentGetMateAlignment(CSRA1_Alignment * self,ctx_t ctx)1331 CSRA1_Alignment* CSRA1_AlignmentGetMateAlignment( CSRA1_Alignment* self, ctx_t ctx )
1332 {
1333 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
1334 TRY ( int64_t mate_row = NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_ALIGN_ID ) )
1335 {
1336 if ( ! self -> in_primary )
1337 {
1338 mate_row += self -> id_offset;
1339 }
1340
1341 {
1342 TRY ( NGS_String * mate_id = NGS_IdMake( ctx,
1343 self -> run_name,
1344 self -> in_primary ? NGSObject_PrimaryAlignment : NGSObject_SecondaryAlignment,
1345 mate_row ) )
1346 {
1347 CSRA1_Alignment* ret = (CSRA1_Alignment*)
1348 NGS_ReadCollectionGetAlignment ( CSRA1_ReadCollectionToNGS_ReadCollection ( self -> coll, ctx ),
1349 ctx,
1350 NGS_StringData ( mate_id, ctx ) );
1351 NGS_StringRelease (mate_id, ctx );
1352 return ret;
1353 }
1354 }
1355 }
1356 return NULL;
1357 }
1358
CSRA1_AlignmentIteratorMake(ctx_t ctx,struct CSRA1_ReadCollection * coll,bool primary,bool secondary,const NGS_String * run_name,uint64_t id_offset)1359 NGS_Alignment * CSRA1_AlignmentIteratorMake ( ctx_t ctx,
1360 struct CSRA1_ReadCollection * coll,
1361 bool primary,
1362 bool secondary,
1363 const NGS_String * run_name,
1364 uint64_t id_offset )
1365 {
1366 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
1367
1368 CSRA1_Alignment * ref;
1369
1370 ref = calloc ( 1, sizeof * ref );
1371 if ( ref == NULL )
1372 SYSTEM_ERROR ( xcNoMemory,
1373 "allocating NGS_AlignmentIterator on '%.*s'",
1374 NGS_StringSize ( run_name, ctx ),
1375 NGS_StringData ( run_name, ctx ) );
1376 else
1377 {
1378 #if _DEBUGGING
1379 char instname [ 256 ];
1380 string_printf ( instname,
1381 sizeof instname,
1382 NULL,
1383 "%.*s",
1384 NGS_StringSize ( run_name, ctx ),
1385 NGS_StringData ( run_name, ctx ) );
1386 instname [ sizeof instname - 1 ] = 0;
1387 #else
1388 const char *instname = "";
1389 #endif
1390 TRY ( CSRA1_AlignmentInit ( ref, ctx, coll, "NGS_AlignmentIterator", instname, NGS_StringData (run_name, ctx), NGS_StringSize (run_name, ctx), true, primary, secondary, id_offset ) )
1391 {
1392 TRY ( CSRA1_AlignmentInitRegion ( ref, ctx, ref -> primary_curs, ref -> secondary_curs, 0, ULLONG_MAX ) )
1393 {
1394 return ( NGS_Alignment * ) ref;
1395 }
1396 CSRA1_AlignmentWhack ( ref, ctx );
1397 }
1398
1399 free ( ref );
1400 }
1401
1402 return NULL;
1403 }
1404
CSRA1_AlignmentRangeMake(ctx_t ctx,struct CSRA1_ReadCollection * coll,bool primary,bool secondary,const NGS_String * run_name,uint64_t id_offset,int64_t first,uint64_t count)1405 NGS_Alignment * CSRA1_AlignmentRangeMake ( ctx_t ctx,
1406 struct CSRA1_ReadCollection * coll,
1407 bool primary,
1408 bool secondary,
1409 const NGS_String * run_name,
1410 uint64_t id_offset,
1411 int64_t first,
1412 uint64_t count)
1413 {
1414 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
1415
1416 CSRA1_Alignment * ref;
1417
1418 ref = calloc ( 1, sizeof * ref );
1419 if ( ref == NULL )
1420 SYSTEM_ERROR ( xcNoMemory,
1421 "allocating NGS_AlignmentRange on '%.*s'",
1422 NGS_StringSize ( run_name, ctx ),
1423 NGS_StringData ( run_name, ctx ) );
1424 else
1425 {
1426 #if _DEBUGGING
1427 char instname [ 256 ];
1428 string_printf ( instname,
1429 sizeof instname,
1430 NULL,
1431 "%.*s",
1432 NGS_StringSize ( run_name, ctx ),
1433 NGS_StringData ( run_name, ctx ) );
1434 instname [ sizeof instname - 1 ] = 0;
1435 #else
1436 const char *instname = "";
1437 #endif
1438 TRY ( CSRA1_AlignmentInit ( ref, ctx, coll, "NGS_AlignmentRange", instname, NGS_StringData( run_name, ctx ), NGS_StringSize( run_name, ctx ), true, primary, secondary, id_offset ) )
1439 {
1440 TRY ( CSRA1_AlignmentInitRegion ( ref, ctx, ref -> primary_curs, ref -> secondary_curs, first, count ) )
1441 {
1442 return ( NGS_Alignment * ) ref;
1443 }
1444 CSRA1_AlignmentWhack ( ref, ctx );
1445 }
1446
1447 free ( ref );
1448 }
1449
1450 return NULL;
1451 }
1452
1453