1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include "CSRA1_Alignment.h"
28 
29 typedef struct CSRA1_Alignment CSRA1_Alignment;
30 #define NGS_ALIGNMENT CSRA1_Alignment
31 
32 #include "NGS_Alignment.h"
33 #include "NGS_ReadCollection.h"
34 #include "NGS_Refcount.h"
35 #include "NGS_Read.h"
36 
37 #include "NGS_Id.h"
38 #include "NGS_String.h"
39 #include "NGS_Cursor.h"
40 
41 #include "CSRA1_ReadCollection.h"
42 
43 #include <sysalloc.h>
44 
45 #include <vdb/cursor.h>
46 #include <vdb/vdb-priv.h>
47 
48 #include <klib/printf.h>
49 #include <klib/rc.h>
50 
51 #include <kfc/ctx.h>
52 #include <kfc/rsrc.h>
53 #include <kfc/except.h>
54 #include <kfc/xc.h>
55 
56 #include <limits.h>
57 
58 #ifndef min
59 #   define min(a,b) ( (a) < (b) ? (a) : (b) )
60 #endif
61 
62 /*--------------------------------------------------------------------------
63  * CSRA1_Alignment
64  */
65 
66 /* align_col_specs must be kept in sync with enum AlignmentTableColumns */
67 static const char * align_col_specs [] =
68 {
69     "(I32)MAPQ",
70     "(INSDC:SRA:read_filter)READ_FILTER",
71     "(ascii)CIGAR_LONG",
72     "(ascii)CIGAR_SHORT",
73     "(ascii)CLIPPED_CIGAR_LONG",
74     "(ascii)CLIPPED_CIGAR_SHORT",
75     "(INSDC:quality:phred)CLIPPED_QUALITY",
76     "(INSDC:dna:text)CLIPPED_READ",
77     "(INSDC:coord:len)LEFT_SOFT_CLIP",
78     "(INSDC:coord:len)RIGHT_SOFT_CLIP",
79     "(INSDC:quality:phred)QUALITY",
80     "(INSDC:dna:text)RAW_READ",
81     "(INSDC:dna:text)READ",
82     "(I64)REF_ID",
83     "(INSDC:coord:len)REF_LEN",
84     "(ascii)REF_SEQ_ID",	/* was REF_NAME changed March 23 2015 */
85     "(bool)REF_ORIENTATION",
86     "(INSDC:coord:zero)REF_POS",
87     "(INSDC:dna:text)REF_READ",
88     "(INSDC:coord:one)SEQ_READ_ID",
89     "(I64)SEQ_SPOT_ID",
90     "(ascii)SPOT_GROUP",
91     "(I32)TEMPLATE_LEN",
92     "(ascii)RNA_ORIENTATION",
93     "(I64)MATE_ALIGN_ID",
94     "(ascii)MATE_REF_SEQ_ID",
95     "(ascii)MATE_REF_NAME", /* to be used if MATE_REF_SEQ_ID is absent */
96     "(bool)MATE_REF_ORIENTATION",
97     "(bool)HAS_REF_OFFSET",
98     "(I32)REF_OFFSET"
99 };
100 /* Made changes to align_col_specs? - Make the same in enum AlignmentTableColumns! */
101 
102 /* enum AlignmentTableColumns must be kept in sync with align_col_specs */
103 enum AlignmentTableColumns
104 {
105     align_MAPQ,
106     align_READ_FILTER,
107     align_CIGAR_LONG,
108     align_CIGAR_SHORT,
109     align_CLIPPED_CIGAR_LONG,
110     align_CLIPPED_CIGAR_SHORT,
111     align_CLIPPED_QUALITY,
112     align_CLIPPED_READ,
113     align_LEFT_SOFT_CLIP,
114     align_RIGHT_SOFT_CLIP,
115     align_QUALITY,
116     align_RAW_READ,
117     align_READ,
118     align_REF_ID,
119     align_REF_LEN,
120     align_REF_SEQ_ID,
121     align_REF_ORIENTATION,
122     align_REF_POS,
123     align_REF_READ,
124     align_SEQ_READ_ID,
125     align_SEQ_SPOT_ID,
126     align_SPOT_GROUP,
127     align_TEMPLATE_LEN,
128     align_RNA_ORIENTATION,
129     align_MATE_ALIGN_ID,
130     align_MATE_REF_SEQ_ID,
131     align_MATE_REF_NAME,
132     align_MATE_REF_ORIENTATION,
133     align_HAS_REF_OFFSET,
134     align_REF_OFFSET,
135 
136     align_NUM_COLS
137 };
138 /* Made changes to enum AlignmentTableColumns? - Make the same in align_col_specs! */
139 
140 
CSRA1_AlignmentMakeDb(ctx_t ctx,struct VDatabase const * db,struct NGS_String const * run_name,char const * table_name)141 struct NGS_Cursor const* CSRA1_AlignmentMakeDb ( ctx_t ctx,
142                                                  struct VDatabase const* db,
143                                                  struct NGS_String const* run_name,
144                                                  char const* table_name )
145 {
146     return NGS_CursorMakeDb ( ctx, db, run_name, table_name, align_col_specs, align_NUM_COLS );
147 }
148 
149 struct CSRA1_Alignment
150 {
151     NGS_Refcount dad;
152     struct CSRA1_ReadCollection * coll;
153     const NGS_String * run_name;
154 
155     int64_t cur_row;
156     int64_t row_max;
157 
158     const NGS_Cursor * primary_curs;
159     const NGS_Cursor * secondary_curs;
160     NGS_String * col_data [ align_NUM_COLS ];
161 
162 	uint64_t id_offset;
163 
164     bool seen_first;
165 	bool in_primary;
166 
167     /* for use in slices */
168 	int64_t secondary_start;
169 	int64_t secondary_max;
170 
171     /* data to be accessed via CellData */
172     void const* cell_data [ align_NUM_COLS ];
173     uint32_t cell_len [ align_NUM_COLS ];
174 };
175 
CSRA1_AlignmentGetCellData(CSRA1_Alignment * self,ctx_t ctx,uint32_t col_idx)176 static void const* CSRA1_AlignmentGetCellData ( CSRA1_Alignment * self,
177                                                 ctx_t ctx,
178                                                 uint32_t col_idx
179                                                 )
180 {
181     if ( self -> cell_data [ col_idx ] == NULL )
182     {
183         assert ( self -> cell_len [ col_idx ] == 0 );
184 
185         if ( ! self -> seen_first )
186         {
187             USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
188             return NULL;
189         }
190 
191         NGS_CursorCellDataDirect ( self -> in_primary ? self->primary_curs : self->secondary_curs,
192             ctx,
193             self->cur_row,
194             col_idx,
195             NULL,
196             & self -> cell_data [ col_idx ],
197             NULL,
198             & self -> cell_len [ col_idx ]
199         );
200 
201         if ( FAILED() )
202         {
203             self -> cell_data [ col_idx ] = NULL;
204             self -> cell_len [ col_idx ] = 0;
205         }
206     }
207 
208     return self -> cell_data [ col_idx ];
209 }
210 
211 #define GetCursor( self ) ( self -> in_primary ? self -> primary_curs : self -> secondary_curs )
212 
213 /* Whack
214  */
215 static
CSRA1_AlignmentWhack(CSRA1_Alignment * self,ctx_t ctx)216 void CSRA1_AlignmentWhack ( CSRA1_Alignment * self, ctx_t ctx )
217 {
218     uint32_t i;
219     for ( i = 0; i < align_NUM_COLS; ++ i )
220     {
221         NGS_StringRelease ( self -> col_data [ i ], ctx );
222         self -> col_data [ i ] = NULL;
223     }
224 
225     NGS_CursorRelease ( self -> primary_curs, ctx );
226     NGS_CursorRelease ( self -> secondary_curs, ctx );
227 
228     NGS_StringRelease ( self -> run_name, ctx );
229     CSRA1_ReadCollectionRelease ( self -> coll, ctx );
230 }
231 
232 static
CSRA1_AlignmentInitRegion(CSRA1_Alignment * self,ctx_t ctx,const NGS_Cursor * primary,const NGS_Cursor * secondary,int64_t start,uint64_t count)233 void CSRA1_AlignmentInitRegion ( CSRA1_Alignment * self,
234                                  ctx_t ctx,
235                                  const NGS_Cursor * primary,
236                                  const NGS_Cursor * secondary,
237                                  int64_t start,
238                                  uint64_t count )
239 {
240     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
241 /*printf("CSRA1_AlignmentInitRegion(primary=%p, secondary=%p, start=%ld, count=%lu, offset=%lu)\n",
242         (void*)primary, (void*)secondary, start, count, self -> id_offset);    */
243 
244     /* split the requested region across primary/secondary table, adjust the boundaries as necessary */
245     if ( ! FAILED () )
246     {
247         if ( primary != NULL )
248         {
249             int64_t primary_start;
250             uint64_t primary_count;
251             TRY ( NGS_CursorGetRowRange ( primary, ctx, & primary_start, & primary_count ) )
252             {
253                 uint64_t table_end;
254 
255                 if ( start < primary_start )
256                 {
257                     count -= ( primary_start - start );
258                     start = primary_start;
259                 }
260 
261                 table_end = primary_start + primary_count;
262 
263                 if ( start < (int64_t)table_end )
264                 {
265                     self -> cur_row = start;
266                     self -> row_max = min ( table_end, (uint64_t)start + min ( count, primary_count ) );
267 
268                     if ( self -> row_max == table_end )
269                     {   /* a part of the range is in the secondary; reduce the count by the number or records from primary */
270                         count -= (uint64_t) ( (int64_t)self -> row_max - self -> cur_row );
271                         start = 1; /* this will be the starting rowId in the secondary */
272                     }
273                 }
274                 else
275                 {   /* the entire range is beyond the primary cursor;
276                        set up so that the first call to Next() will go into the secondary cursor */
277                     self -> cur_row = self -> row_max = table_end;
278                     start -= self -> id_offset; /* this will be the starting rowId in the secondary */
279                     self -> in_primary = false;
280                 }
281             }
282         }
283         else
284         {   /* primary not requested */
285             if ( start <= (int64_t)self -> id_offset )
286             {   /* range overlaps the primary ID space; adjust the range to exclude primary */
287                 count -= ( self -> id_offset - start + 1 );
288                 start = 1;
289             }
290             else
291             {
292                 start -= self -> id_offset; /* this will be the starting rowId in the secondary */
293             }
294             /* make sure the first call to Next() will switch to the secondary cursor */
295             self -> cur_row = self -> row_max = self -> id_offset + 1;
296             self -> in_primary = false;
297         }
298 
299         if ( ! FAILED () && secondary != NULL )
300         {
301             int64_t  secondary_start;
302             uint64_t secondary_count;
303             TRY ( NGS_CursorGetRowRange ( secondary, ctx, & secondary_start, & secondary_count ) )
304             {
305                 uint64_t table_end;
306                 if ( start < secondary_start )
307                 {
308                     count -= ( secondary_start - start );
309                     start = secondary_start;
310                 }
311 
312                 table_end = secondary_start + secondary_count;
313 
314                 if ( start < (int64_t)table_end )
315                 {
316                     self -> secondary_start = start;
317                     self -> secondary_max = min ( table_end, (uint64_t)start + min ( count, secondary_count ) );
318                 }
319                 else
320                 {
321                     self -> secondary_start = self -> secondary_max = table_end;
322                 }
323             }
324             if ( ! self -> in_primary )
325             {
326                 self -> cur_row = self -> secondary_start;
327                 self -> row_max = self -> secondary_max;
328             }
329         }
330 /*    printf("cur_row=%ld, row_max=%lu, secondary_start=%ld, secondary_max=%lu)\n",
331             self->cur_row, self->row_max, self -> secondary_start, self -> secondary_max);    */
332     }
333 }
334 
CSRA1_AlignmentGetAlignmentId(CSRA1_Alignment * self,ctx_t ctx)335 NGS_String * CSRA1_AlignmentGetAlignmentId( CSRA1_Alignment* self, ctx_t ctx )
336 {
337     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
338 
339     if ( ! self -> seen_first )
340     {
341         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
342         return NULL;
343     }
344 
345     /* if current row is valid, read data */
346     if ( self -> cur_row < self -> row_max )
347     {
348         if ( self -> in_primary )
349         {
350             return NGS_IdMake ( ctx, self -> run_name, NGSObject_PrimaryAlignment, self -> cur_row );
351         }
352         else
353         {
354             return NGS_IdMake ( ctx, self -> run_name, NGSObject_SecondaryAlignment, self -> cur_row + self -> id_offset );
355         }
356     }
357 
358     USER_ERROR ( xcCursorExhausted, "No more rows available" );
359     return NULL;
360 }
361 
CSRA1_AlignmentGetReferenceSpec(CSRA1_Alignment * self,ctx_t ctx)362 struct NGS_String* CSRA1_AlignmentGetReferenceSpec( CSRA1_Alignment* self, ctx_t ctx )
363 {
364     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
365     if ( ! self -> seen_first )
366     {
367         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
368         return NULL;
369     }
370 
371     return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_REF_SEQ_ID );
372 }
373 
CSRA1_AlignmentGetMappingQuality(CSRA1_Alignment * self,ctx_t ctx)374 int CSRA1_AlignmentGetMappingQuality( CSRA1_Alignment* self, ctx_t ctx )
375 {
376     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
377     if ( ! self -> seen_first )
378     {
379         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
380         return 0;
381     }
382     return NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_MAPQ );
383 }
384 
CSRA1_AlignmentGetReadFilter(CSRA1_Alignment * self,ctx_t ctx)385 INSDC_read_filter CSRA1_AlignmentGetReadFilter( CSRA1_Alignment* self, ctx_t ctx )
386 {
387     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
388     if ( ! self -> seen_first )
389     {
390         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
391         return 0;
392     }
393     assert ( sizeof ( INSDC_read_filter ) == sizeof ( char ) );
394     return ( uint8_t ) NGS_CursorGetChar ( GetCursor ( self ), ctx, self -> cur_row, align_READ_FILTER );
395 }
396 
CSRA1_AlignmentGetReferenceBases(CSRA1_Alignment * self,ctx_t ctx)397 struct NGS_String* CSRA1_AlignmentGetReferenceBases( CSRA1_Alignment* self, ctx_t ctx )
398 {
399     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
400     if ( ! self -> seen_first )
401     {
402         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
403         return NULL;
404     }
405     return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_REF_READ );
406 }
407 
CSRA1_AlignmentGetReadGroup(CSRA1_Alignment * self,ctx_t ctx)408 struct NGS_String* CSRA1_AlignmentGetReadGroup( CSRA1_Alignment* self, ctx_t ctx )
409 {
410     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
411     if ( ! self -> seen_first )
412     {
413         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
414         return NULL;
415     }
416     else
417     {
418         TRY ( NGS_String* ret = NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_SPOT_GROUP ) )
419         {
420             return ret;
421         }
422         CATCH_ALL ()
423         {
424             CLEAR();
425         }
426         return NGS_StringMake ( ctx, "", 0 );
427     }
428 }
429 
CSRA1_AlignmentGetReadId(CSRA1_Alignment * self,ctx_t ctx)430 NGS_String * CSRA1_AlignmentGetReadId( CSRA1_Alignment* self, ctx_t ctx )
431 {
432     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
433 
434     if ( ! self -> seen_first )
435     {
436         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
437         return 0;
438     }
439 
440     {
441         TRY ( int64_t readId = NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_SEQ_SPOT_ID ) )
442         {
443             return NGS_IdMake ( ctx, self -> run_name, NGSObject_Read, readId );
444         }
445     }
446     return NULL;
447 }
448 
CSRA1_AlignmentGetClippedFragmentBases(CSRA1_Alignment * self,ctx_t ctx)449 struct NGS_String* CSRA1_AlignmentGetClippedFragmentBases( CSRA1_Alignment* self, ctx_t ctx )
450 {
451     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
452     if ( ! self -> seen_first )
453     {
454         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
455         return NULL;
456     }
457 
458     return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_CLIPPED_READ );
459 }
460 
CSRA1_AlignmentGetClippedFragmentQualities(CSRA1_Alignment * self,ctx_t ctx)461 struct NGS_String* CSRA1_AlignmentGetClippedFragmentQualities( CSRA1_Alignment* self, ctx_t ctx )
462 {
463     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
464     if ( ! self -> seen_first )
465     {
466         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
467         return NULL;
468     }
469 
470     {
471         NGS_String* phred = NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_CLIPPED_QUALITY );
472         /* convert to ascii-33 */
473         size_t size = NGS_StringSize ( phred, ctx );
474         char * copy = malloc ( size + 1 );
475         if ( copy == NULL )
476         {
477             SYSTEM_ERROR ( xcNoMemory,
478                            "allocating %u bytes for %s row %ld",
479                            size + 1, "CLIPPED_QUALITY", self -> cur_row );
480             NGS_StringRelease ( phred, ctx );
481             return NULL;
482         }
483         else
484         {
485             NGS_String* ret;
486             const char* orig = NGS_StringData ( phred, ctx );
487             size_t i;
488             for ( i = 0; i < size ; ++ i )
489                 copy [ i ] = ( char ) ( (uint8_t)(orig [ i ]) + 33 );
490             copy [ i ] = 0;
491 
492             ret = NGS_StringMakeOwned ( ctx, copy, size );
493             if ( FAILED () )
494                 free ( copy );
495             NGS_StringRelease ( phred, ctx );
496             return ret;
497         }
498     }
499 }
500 
CSRA1_AlignmentGetAlignedFragmentBases(CSRA1_Alignment * self,ctx_t ctx)501 struct NGS_String* CSRA1_AlignmentGetAlignedFragmentBases( CSRA1_Alignment* self, ctx_t ctx )
502 {
503     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
504     if ( ! self -> seen_first )
505     {
506         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
507         return NULL;
508     }
509 
510     return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_READ );
511 }
512 
CSRA1_AlignmentIsPrimary(CSRA1_Alignment * self,ctx_t ctx)513 bool CSRA1_AlignmentIsPrimary( CSRA1_Alignment* self, ctx_t ctx )
514 {
515     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
516 
517     assert ( self );
518     if ( ! self -> seen_first )
519     {
520         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
521         return false;
522     }
523 
524     return self -> in_primary;
525 }
526 
CSRA1_AlignmentGetAlignmentPosition(CSRA1_Alignment * self,ctx_t ctx)527 int64_t CSRA1_AlignmentGetAlignmentPosition( CSRA1_Alignment* self, ctx_t ctx )
528 {
529     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
530     if ( ! self -> seen_first )
531     {
532         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
533         return 0;
534     }
535 
536     return NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_REF_POS);
537 }
538 
CSRA1_AlignmentGetReferencePositionProjectionRange(CSRA1_Alignment * self,ctx_t ctx,int64_t ref_pos)539 uint64_t CSRA1_AlignmentGetReferencePositionProjectionRange( CSRA1_Alignment* self, ctx_t ctx, int64_t ref_pos )
540 {
541     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
542     uint64_t ret;
543     bool const* HAS_REF_OFFSET;
544     int32_t const* REF_OFFSET;
545 
546     if ( ! self -> seen_first )
547     {
548         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
549         return (uint64_t)-1;
550     }
551 
552     REF_OFFSET = CSRA1_AlignmentGetCellData ( self, ctx, align_REF_OFFSET );
553     /* Check for error, REF_OFFSET == NULL? if (  ) */
554 
555     /* if there is no indels just calculate projection as (ref_pos - REF_POS) with len = 1 */
556     if ( self -> cell_len [ align_REF_OFFSET ] == 0 )
557     {
558         int32_t align_len = NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_REF_LEN);
559         ret = ref_pos - NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_REF_POS);
560 
561         if ( FAILED() )
562         {
563             SYSTEM_ERROR ( xcIteratorUninitialized, "Failed to access REF_LEN or REF_POS" );
564             return (uint64_t)-1;
565         }
566         else if ( ret >= align_len )
567         {
568             /* calculated projection is out of bounds, i.e. ref_pos
569                doesn't project on the alignment
570                (it also catches ref_pos < align_REF_POS case)
571             */
572             ret = (uint64_t)-1;
573         }
574         else
575         {
576             /* ref_pos has a projection on the current alignment -
577                pack it and make its length = 1
578             */
579             ret <<= 32;
580             ret |= 1;
581         }
582     }
583     else /* we have indels */
584     {
585         int32_t read_len;
586         int32_t idx_ref, idx_HAS_REF_OFFSET = 0, idx_REF_OFFSET = 0;
587         int32_t align_pos;
588         uint32_t proj_len;
589 
590         HAS_REF_OFFSET = CSRA1_AlignmentGetCellData ( self, ctx, align_HAS_REF_OFFSET );
591         /* Check for error, HAS_REF_OFFSET == NULL? if (  ) */
592 
593         if ( HAS_REF_OFFSET == NULL )
594         {
595             SYSTEM_ERROR ( xcIteratorUninitialized, "Failed to access HAS_REF_OFFSET" );
596             return (uint64_t)-1;
597         }
598 
599         read_len = self -> cell_len [ align_HAS_REF_OFFSET ];
600         idx_ref = NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_REF_POS);
601 
602         if ( FAILED () )
603         {
604             SYSTEM_ERROR ( xcIteratorUninitialized, "Failed to access REF_POS" );
605             return (uint64_t)-1;
606         }
607 
608         if ( idx_ref > ref_pos )
609         {
610             /* the alignment starts beyond given ref_pos
611                 out of bounds
612             */
613             ret = (uint64_t)-1;
614         }
615         else
616         {
617             for ( align_pos = 0, proj_len = 1; idx_ref < ref_pos && align_pos < read_len ; align_pos += proj_len )
618             {
619                 bool has_ref_offset = HAS_REF_OFFSET [ idx_HAS_REF_OFFSET++ ];
620                 if ( has_ref_offset == 0) /* match/mismatch */
621                 {
622                     ++idx_ref;
623                     proj_len = 1;
624                 }
625                 else /* indel */
626                 {
627                     int32_t ref_offset = REF_OFFSET [ idx_REF_OFFSET++ ];
628 
629                     if ( ref_offset < 0 )
630                     {
631                         /* insertion */
632                         proj_len = (uint32_t)-ref_offset;
633                         ++idx_ref;
634                     }
635                     else
636                     {
637                         /* deletion */
638                         assert ( ref_offset > 0 );
639 
640                         idx_ref += ref_offset;
641                         proj_len = 0;
642                     }
643                 }
644             }
645 
646             /* in the case we exited from the loop at the insertion, align_pos points beyond
647                the insertion - it should be restored to point to the beginning of the insertion
648             */
649             if ( proj_len > 1 )
650                 align_pos -= proj_len;
651 
652             if ( align_pos >= read_len )
653             {
654                 align_pos = -1;
655                 proj_len = 0;
656             }
657 
658             ret = ((uint64_t)align_pos << 32) | proj_len;
659         }
660     }
661 
662     return ret;
663 }
664 
CSRA1_AlignmentGetAlignmentLength(CSRA1_Alignment * self,ctx_t ctx)665 uint64_t CSRA1_AlignmentGetAlignmentLength( CSRA1_Alignment* self, ctx_t ctx )
666 {
667     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
668     if ( ! self -> seen_first )
669     {
670         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
671         return 0;
672     }
673 
674     return NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_REF_LEN);
675 }
676 
CSRA1_AlignmentGetIsReversedOrientation(CSRA1_Alignment * self,ctx_t ctx)677 bool CSRA1_AlignmentGetIsReversedOrientation( CSRA1_Alignment* self, ctx_t ctx )
678 {
679     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
680     if ( ! self -> seen_first )
681     {
682         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
683         return false;
684     }
685 
686     return NGS_CursorGetBool ( GetCursor ( self ), ctx, self -> cur_row, align_REF_ORIENTATION);
687 }
688 
CSRA1_AlignmentGetSoftClip(CSRA1_Alignment * self,ctx_t ctx,bool left)689 int CSRA1_AlignmentGetSoftClip( CSRA1_Alignment* self, ctx_t ctx, bool left )
690 {
691     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
692     if ( ! self -> seen_first )
693     {
694         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
695         return 0;
696     }
697 
698     return NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, left ? align_LEFT_SOFT_CLIP : align_RIGHT_SOFT_CLIP );
699 }
700 
CSRA1_AlignmentGetTemplateLength(CSRA1_Alignment * self,ctx_t ctx)701 uint64_t CSRA1_AlignmentGetTemplateLength( CSRA1_Alignment* self, ctx_t ctx )
702 {
703     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
704     if ( ! self -> seen_first )
705     {
706         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
707         return 0;
708     }
709 
710     return NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_TEMPLATE_LEN);
711 }
712 
CSRA1_AlignmentGetShortCigar(CSRA1_Alignment * self,ctx_t ctx,bool clipped)713 struct NGS_String* CSRA1_AlignmentGetShortCigar( CSRA1_Alignment* self, ctx_t ctx, bool clipped )
714 {
715     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
716     if ( ! self -> seen_first )
717     {
718         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
719         return NULL;
720     }
721 
722     return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, clipped ? align_CLIPPED_CIGAR_SHORT: align_CIGAR_SHORT );
723 }
724 
CSRA1_AlignmentGetLongCigar(CSRA1_Alignment * self,ctx_t ctx,bool clipped)725 struct NGS_String* CSRA1_AlignmentGetLongCigar( CSRA1_Alignment* self, ctx_t ctx, bool clipped )
726 {
727     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
728     if ( ! self -> seen_first )
729     {
730         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
731         return NULL;
732     }
733 
734     return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, clipped ? align_CLIPPED_CIGAR_LONG : align_CIGAR_LONG );
735 }
736 
CSRA1_AlignmentGetRNAOrientation(CSRA1_Alignment * self,ctx_t ctx)737 char CSRA1_AlignmentGetRNAOrientation( CSRA1_Alignment* self, ctx_t ctx )
738 {
739     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
740     if ( ! self -> seen_first )
741     {
742         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
743     }
744     else
745     {
746         TRY ( char ret = NGS_CursorGetChar ( GetCursor ( self ), ctx, self -> cur_row, align_RNA_ORIENTATION ) )
747         {
748             return ret;
749         }
750         CATCH_ALL ()
751         {
752             CLEAR();
753         }
754     }
755     return '?';
756 }
757 
CSRA1_AlignmentHasMate(CSRA1_Alignment * self,ctx_t ctx)758 bool CSRA1_AlignmentHasMate( CSRA1_Alignment* self, ctx_t ctx )
759 {
760     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
761     if ( ! self -> seen_first )
762     {
763         USER_WARNING ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
764         return false;
765     }
766 
767     TRY ( NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_ALIGN_ID ) )
768     {
769         int64_t mate_seq_spot_id;
770 
771         if ( self -> in_primary )
772             return true;
773 
774         TRY ( mate_seq_spot_id = NGS_CursorGetInt64 ( self -> secondary_curs, ctx, self -> cur_row, align_SEQ_SPOT_ID ) )
775         {
776             if ( mate_seq_spot_id > 0 )
777                 return true;
778         }
779     }
780 
781     CLEAR();
782 
783     return false;
784 }
785 
CSRA1_AlignmentGetMateAlignmentId(CSRA1_Alignment * self,ctx_t ctx)786 NGS_String * CSRA1_AlignmentGetMateAlignmentId( CSRA1_Alignment* self, ctx_t ctx )
787 {
788     int64_t mateId;
789     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
790 
791     if ( ! self -> seen_first )
792     {
793         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
794         return 0;
795     }
796 
797     TRY ( mateId = NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_ALIGN_ID ) )
798     {
799         if ( ! self -> in_primary )
800         {
801             TRY ( int64_t mate_seq_spot_id = NGS_CursorGetInt64 ( self -> secondary_curs, ctx, mateId, align_SEQ_SPOT_ID ) )
802             {
803                 if ( mate_seq_spot_id <= 0 )
804                 {
805                     INTERNAL_ERROR ( xcSecondaryAlignmentMissingPrimary,
806                                      "secondary mate alignment id ( %li ) missing primary within %.*s",
807                                      mateId + self -> id_offset,
808                                      NGS_StringSize ( self -> run_name, ctx ),
809                                      NGS_StringData ( self -> run_name, ctx ) );
810                 }
811             }
812         }
813 
814         if ( ! FAILED () )
815         {
816             return NGS_IdMake ( ctx,
817                                 self -> run_name,
818                                 self -> in_primary ? NGSObject_PrimaryAlignment : NGSObject_SecondaryAlignment,
819                                 self -> in_primary ? mateId : mateId + self -> id_offset );
820         }
821     }
822     return NULL;
823 }
824 
CSRA1_AlignmentGetMateReferenceSpec(CSRA1_Alignment * self,ctx_t ctx)825 struct NGS_String* CSRA1_AlignmentGetMateReferenceSpec( CSRA1_Alignment* self, ctx_t ctx )
826 {
827     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
828     if ( ! self -> seen_first )
829     {
830         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
831         return NULL;
832     }
833 
834     {
835         NGS_String* ret;
836         TRY ( ret = NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_REF_SEQ_ID) )
837         {
838             return ret;
839         }
840         if ( (int)GetRCObject ( ctx -> rc ) == rcColumn && GetRCState ( ctx -> rc ) == rcNotFound )
841         {
842             CLEAR ();
843             return NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_REF_NAME );
844         }
845     }
846     return NULL;
847 }
848 
CSRA1_AlignmentGetMateIsReversedOrientation(CSRA1_Alignment * self,ctx_t ctx)849 bool CSRA1_AlignmentGetMateIsReversedOrientation( CSRA1_Alignment* self, ctx_t ctx )
850 {
851     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
852     if ( ! self -> seen_first )
853     {
854         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
855         return false;
856     }
857 
858     return NGS_CursorGetBool ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_REF_ORIENTATION);
859 }
860 
CSRA1_AlignmentIsFirst(CSRA1_Alignment * self,ctx_t ctx)861 bool CSRA1_AlignmentIsFirst( CSRA1_Alignment* self, ctx_t ctx )
862 {
863     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
864     int32_t seq_read_id;
865 
866     assert ( self );
867     if ( ! self -> seen_first )
868     {
869         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
870         return false;
871     }
872 
873     TRY ( seq_read_id = NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_SEQ_READ_ID ) )
874     {
875         return seq_read_id == 1;
876     }
877     return false;
878 }
879 
880 
881 /*--------------------------------------------------------------------------
882  * NGS_AlignmentIterator
883  */
884 static
CSRA1_AlignmentIteratorNext(CSRA1_Alignment * self,ctx_t ctx)885 bool CSRA1_AlignmentIteratorNext ( CSRA1_Alignment* self, ctx_t ctx )
886 {
887     assert ( self != NULL );
888 
889     if ( !self -> seen_first )
890     {
891         self -> seen_first = true;
892     }
893     else
894     {
895         ++ self -> cur_row;
896     }
897 
898     for ( ; self -> cur_row < self -> row_max; ++ self -> cur_row )
899     {
900         int64_t seq_spot_id;
901 
902         if ( self -> in_primary )
903             return true;
904 
905         TRY ( seq_spot_id = NGS_CursorGetInt64 ( self -> secondary_curs, ctx, self -> cur_row, align_SEQ_SPOT_ID ) )
906         {
907             if ( seq_spot_id > 0 )
908                 return true;
909         }
910 
911         CLEAR ();
912     }
913 
914     /* see if we need to switch over to the next cursor */
915     if ( self -> in_primary && self -> secondary_curs != NULL )
916     {
917         self -> in_primary = false;
918 
919         self -> cur_row = self -> secondary_start;
920         self -> row_max = self -> secondary_max;
921 
922         /* let's re-run "next" again to check SEQ_SPOT_ID */
923         self -> seen_first = false;
924         return CSRA1_AlignmentIteratorNext ( self, ctx );
925     }
926 
927     return false;
928 }
929 
930 static CSRA1_Alignment* CSRA1_AlignmentGetMateAlignment( CSRA1_Alignment* self, ctx_t ctx );
931 
932 static
CSRA1_FragmentGetId(CSRA1_Alignment * self,ctx_t ctx)933 NGS_String* CSRA1_FragmentGetId ( CSRA1_Alignment * self, ctx_t ctx )
934 {
935     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
936 
937     assert ( self != NULL );
938     if ( ! self -> seen_first )
939     {
940         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
941         return NULL;
942     }
943 
944     {
945         TRY ( int32_t fragId = NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_SEQ_READ_ID ) )
946         {
947             return NGS_IdMakeFragment ( ctx, self -> run_name, true, self -> cur_row, fragId - 1 );
948         }
949     }
950     return NULL;
951 }
952 
953 static
CSRA1_FragmentGetSequence(CSRA1_Alignment * self,ctx_t ctx,uint64_t offset,uint64_t length)954 struct NGS_String * CSRA1_FragmentGetSequence ( CSRA1_Alignment * self, ctx_t ctx, uint64_t offset, uint64_t length )
955 {
956     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
957     NGS_String * seq;
958 
959     if ( ! self -> seen_first )
960     {
961         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
962         return NULL;
963     }
964 
965     TRY ( seq = NGS_CursorGetString ( GetCursor ( self ), ctx, self -> cur_row, align_RAW_READ ) )
966     {
967         TRY ( NGS_String * sub = NGS_StringSubstrOffsetSize ( seq, ctx, offset, length ) )
968         {
969             NGS_StringRelease ( seq, ctx );
970             seq = sub;
971         }
972     }
973 
974     return seq;
975 }
976 
977 static
CSRA1_FragmentGetQualities(CSRA1_Alignment * self,ctx_t ctx,uint64_t offset,uint64_t length)978 struct NGS_String * CSRA1_FragmentGetQualities ( CSRA1_Alignment * self, ctx_t ctx, uint64_t offset, uint64_t length )
979 {
980     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
981     NGS_String * ret = NULL;
982 
983     if ( ! self -> seen_first )
984     {
985         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
986     }
987     else
988     {
989         const void * base;
990         uint32_t elem_bits, boff, row_len;
991         TRY ( NGS_CursorCellDataDirect ( GetCursor ( self ), ctx, self -> cur_row, align_QUALITY, & elem_bits, & base, & boff, & row_len ) )
992         {
993             assert ( elem_bits == 8 );
994             assert ( boff == 0 );
995 
996             if ( offset > row_len )
997             {
998                 length = 0;
999             }
1000             else if ( offset + length > row_len )
1001             {
1002                 length = row_len - offset;
1003             }
1004 
1005             {   /* convert to ascii-33 */
1006                 char * copy = malloc ( length + 1 );
1007                 if ( copy == NULL )
1008                     SYSTEM_ERROR ( xcNoMemory, "allocating %u bytes for QUALITY row %ld", row_len + 1, self -> cur_row );
1009                 else
1010                 {
1011                     uint32_t i;
1012                     const uint8_t * orig = base;
1013                     for ( i = 0; i < length; ++ i )
1014                     {
1015                         copy [ i ] = ( char ) ( orig [ offset + i ] + 33 );
1016                     }
1017                     copy [ length ] = 0;
1018 
1019                     ret = NGS_StringMakeOwned ( ctx, copy, length );
1020                     if ( FAILED () )
1021                     {
1022                         free ( copy );
1023                     }
1024                 }
1025             }
1026         }
1027     }
1028 
1029     return ret;
1030 }
1031 
1032 static
CSRA1_FragmentIsPaired(CSRA1_Alignment * self,ctx_t ctx)1033 bool CSRA1_FragmentIsPaired ( CSRA1_Alignment * self, ctx_t ctx )
1034 {
1035     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
1036 
1037     int64_t id;
1038     int32_t idx;
1039     bool ret = false;
1040 
1041     if ( ! self -> seen_first )
1042     {
1043         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
1044         return false;
1045     }
1046 
1047     TRY ( id = NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_ALIGN_ID ) )
1048     {
1049     }
1050     CATCH_ALL ()
1051     {
1052         /* if we failed, it means that MATE_ALIGN_ID column is empty, so lets assign it to 0 and move forward */
1053         CLEAR();
1054         id = 0;
1055     }
1056 
1057     /* if MATE_ALIGN_ID != 0, it's paired */
1058     if ( id != 0 )
1059         return true;
1060 
1061     TRY ( idx = NGS_CursorGetInt32 ( GetCursor ( self ), ctx, self -> cur_row, align_SEQ_READ_ID ) )
1062     {
1063         /* if SEQ_READ_ID > 1, it's paired. */
1064         if ( idx > 1 )
1065             return true;
1066 
1067         TRY ( id = NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_SEQ_SPOT_ID ) )
1068         {
1069             NGS_String * readId;
1070 
1071             /* otherwise, have to get spot id and consult SEQUENCE table */
1072             TRY ( readId = NGS_IdMake ( ctx, self -> run_name, NGSObject_Read, id ) )
1073             {
1074                 const char * readIdStr = NGS_StringData ( readId, ctx );
1075                 TRY ( NGS_Read * read = NGS_ReadCollectionGetRead ( ( NGS_ReadCollection * ) self -> coll, ctx, readIdStr ) )
1076                 {
1077                     uint32_t numFragments = NGS_ReadNumFragments(read, ctx);
1078                     ret = numFragments > 1;
1079 
1080                     NGS_ReadRelease ( read, ctx );
1081                 }
1082 
1083                 NGS_StringRelease ( readId, ctx );
1084             }
1085         }
1086     }
1087 
1088     return ret;
1089 }
1090 
1091 static
CSRA1_FragmentIsAligned(CSRA1_Alignment * self,ctx_t ctx)1092 bool CSRA1_FragmentIsAligned ( CSRA1_Alignment * self, ctx_t ctx )
1093 {
1094     assert ( self != NULL );
1095     return true;
1096 }
1097 
1098 static
CSRA1_FragmentNext(CSRA1_Alignment * self,ctx_t ctx)1099 bool CSRA1_FragmentNext ( CSRA1_Alignment * self, ctx_t ctx )
1100 {
1101     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
1102     if ( ! self -> seen_first )
1103     {
1104         USER_ERROR ( xcIteratorUninitialized, "Alignment accessed before a call to AlignmentIteratorNext()" );
1105         return false;
1106     }
1107 
1108     UNIMPLEMENTED(); /* CSRA1_FragmentNext; should not be called - Alignment is not a FragmentIterator */
1109 
1110     return false;
1111 }
1112 
1113 
1114 static NGS_Alignment_vt CSRA1_Alignment_vt_inst =
1115 {
1116     {
1117         {
1118             /* NGS_Refcount */
1119             CSRA1_AlignmentWhack
1120         },
1121 
1122         /* NGS_Fragment */
1123         CSRA1_FragmentGetId,
1124         CSRA1_FragmentGetSequence,
1125         CSRA1_FragmentGetQualities,
1126         CSRA1_FragmentIsPaired,
1127         CSRA1_FragmentIsAligned,
1128         CSRA1_FragmentNext
1129     },
1130 
1131     CSRA1_AlignmentGetAlignmentId,
1132     CSRA1_AlignmentGetReferenceSpec,
1133     CSRA1_AlignmentGetMappingQuality,
1134     CSRA1_AlignmentGetReadFilter,
1135     CSRA1_AlignmentGetReferenceBases,
1136     CSRA1_AlignmentGetReadGroup,
1137     CSRA1_AlignmentGetReadId,
1138     CSRA1_AlignmentGetClippedFragmentBases,
1139     CSRA1_AlignmentGetClippedFragmentQualities,
1140     CSRA1_AlignmentGetAlignedFragmentBases,
1141     CSRA1_AlignmentIsPrimary,
1142     CSRA1_AlignmentGetAlignmentPosition,
1143     CSRA1_AlignmentGetReferencePositionProjectionRange,
1144     CSRA1_AlignmentGetAlignmentLength,
1145     CSRA1_AlignmentGetIsReversedOrientation,
1146     CSRA1_AlignmentGetSoftClip,
1147     CSRA1_AlignmentGetTemplateLength,
1148     CSRA1_AlignmentGetShortCigar,
1149     CSRA1_AlignmentGetLongCigar,
1150     CSRA1_AlignmentGetRNAOrientation,
1151     CSRA1_AlignmentHasMate,
1152     CSRA1_AlignmentGetMateAlignmentId,
1153     CSRA1_AlignmentGetMateAlignment,
1154     CSRA1_AlignmentGetMateReferenceSpec,
1155     CSRA1_AlignmentGetMateIsReversedOrientation,
1156     CSRA1_AlignmentIsFirst,
1157 
1158     /* Iterator */
1159     CSRA1_AlignmentIteratorNext
1160 };
1161 
1162 /* Init
1163  */
1164 static
CSRA1_AlignmentInit(NGS_ALIGNMENT * ref,ctx_t ctx,struct CSRA1_ReadCollection * coll,const char * clsname,const char * instname,const char * run_name,size_t run_name_size,bool exclusive,bool primary,bool secondary,uint64_t id_offset)1165 void CSRA1_AlignmentInit ( NGS_ALIGNMENT* ref,
1166                            ctx_t ctx,
1167                            struct CSRA1_ReadCollection * coll,
1168                            const char *clsname,
1169                            const char *instname,
1170                            const char * run_name, size_t run_name_size,
1171                            bool exclusive,
1172                            bool primary,
1173                            bool secondary,
1174                            uint64_t id_offset )
1175 {
1176     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
1177 
1178     if ( ref == NULL )
1179         INTERNAL_ERROR ( xcParamNull, "bad object reference" );
1180     else
1181     {
1182         TRY ( NGS_AlignmentInit ( ctx, ref, & CSRA1_Alignment_vt_inst, clsname, instname ) )
1183         {
1184             if ( primary  )
1185             {
1186                 ON_FAIL ( ref -> primary_curs = CSRA1_ReadCollectionMakeAlignmentCursor ( coll,
1187                                                                                           ctx,
1188                                                                                           true,
1189                                                                                           exclusive ) )
1190                     return;
1191                 ref -> in_primary   = true;
1192             }
1193 
1194             if ( secondary )
1195             {
1196                 ON_FAIL ( ref -> secondary_curs = CSRA1_ReadCollectionMakeAlignmentCursor ( coll,
1197                                                                                             ctx,
1198                                                                                             false,
1199                                                                                             exclusive ) )
1200                     CLEAR(); /* missing SECONDARY_ALIGNMENTS table is OK */
1201             }
1202 
1203             ref -> id_offset = id_offset;
1204 
1205             ON_FAIL ( ref -> coll = CSRA1_ReadCollectionDuplicate ( coll, ctx ) )
1206                 return;
1207             ON_FAIL ( ref -> run_name = NGS_StringMakeCopy ( ctx, run_name, run_name_size ) )
1208                 return;
1209         }
1210     }
1211 }
1212 
1213 
1214 static
SetRowId(CSRA1_Alignment * self,ctx_t ctx,int64_t rowId,bool primary)1215 void SetRowId ( CSRA1_Alignment* self, ctx_t ctx, int64_t rowId, bool primary )
1216 {   /* validate the requested rowId */
1217     if ( rowId <= 0 )
1218     {
1219         INTERNAL_ERROR ( xcCursorAccessFailed,
1220                          "rowId ( %li ) out of range for %.*s",
1221                          rowId,
1222                          NGS_StringSize ( self -> run_name, ctx ),
1223                          NGS_StringData ( self -> run_name, ctx ) );
1224     }
1225     else
1226     {
1227         int64_t id = rowId;
1228         int64_t  start = 0;
1229         uint64_t count = 0;
1230 
1231         if ( primary )
1232         {
1233             if ( self -> primary_curs != NULL )
1234             {
1235                 ON_FAIL ( NGS_CursorGetRowRange ( self -> primary_curs, ctx, & start, & count ) )
1236                     return;
1237             }
1238         }
1239         else if ( self -> secondary_curs != NULL )
1240         {
1241             ON_FAIL ( NGS_CursorGetRowRange ( self -> secondary_curs, ctx, & start, & count ) )
1242                 return;
1243             id -= self -> id_offset;
1244         }
1245 
1246         if ( (uint64_t)id >= start + count )
1247         {
1248             INTERNAL_ERROR ( xcCursorAccessFailed,
1249                              "rowId ( %li ) out of range for %.*s",
1250                              rowId,
1251                              NGS_StringSize ( self -> run_name, ctx ),
1252                              NGS_StringData ( self -> run_name, ctx ) );
1253         }
1254         else
1255         {
1256             if ( ! primary && self -> secondary_curs != NULL )
1257             {
1258                 TRY ( int64_t spot_id = NGS_CursorGetInt64 ( self -> secondary_curs, ctx, id, align_SEQ_SPOT_ID ) )
1259                 {
1260                     if ( spot_id <= 0 )
1261                     {
1262                         INTERNAL_ERROR ( xcSecondaryAlignmentMissingPrimary,
1263                                          "secondary alignment id ( %li ) missing primary within %.*s",
1264                                          rowId,
1265                                          NGS_StringSize ( self -> run_name, ctx ),
1266                                          NGS_StringData ( self -> run_name, ctx ) );
1267                     }
1268                 }
1269             }
1270 
1271             if ( ! FAILED () )
1272             {
1273                 self -> cur_row = id;
1274                 self -> row_max = id + 1;
1275             }
1276         }
1277     }
1278 }
1279 
1280 /* Make
1281  *  makes a common alignment from VCursor
1282  */
CSRA1_AlignmentMake(ctx_t ctx,struct CSRA1_ReadCollection * coll,int64_t alignId,char const * run_name,size_t run_name_size,bool primary,uint64_t id_offset)1283 NGS_Alignment * CSRA1_AlignmentMake ( ctx_t ctx,
1284                                         struct CSRA1_ReadCollection * coll,
1285                                         int64_t alignId,
1286                                         char const* run_name, size_t run_name_size,
1287                                         bool primary,
1288                                         uint64_t id_offset )
1289 {
1290     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
1291 
1292     CSRA1_Alignment * ref;
1293 
1294     ref = calloc ( 1, sizeof * ref );
1295     if ( ref == NULL )
1296         SYSTEM_ERROR ( xcNoMemory,
1297                        "allocating CSRA1_Alignment(%lu) on '%.*s'",
1298                        alignId,
1299                        run_name_size,
1300                        run_name );
1301     else
1302     {
1303 #if _DEBUGGING
1304         char instname [ 256 ];
1305         string_printf ( instname,
1306                         sizeof instname,
1307                         NULL,
1308                         "%.*s(%lu)",
1309                         run_name_size,
1310                         run_name,
1311                         alignId );
1312         instname [ sizeof instname - 1 ] = 0;
1313 #else
1314         const char *instname = "";
1315 #endif
1316         TRY ( CSRA1_AlignmentInit ( ref, ctx, coll, "CSRA1_Alignment", instname, run_name, run_name_size, false, primary, ! primary, id_offset ) )
1317         {
1318             TRY ( SetRowId( ref, ctx, alignId, primary ) )
1319             {
1320                 ref -> seen_first = true;
1321                 return ( NGS_Alignment * ) ref;
1322             }
1323             CSRA1_AlignmentWhack ( ref, ctx );
1324         }
1325         free ( ref );
1326     }
1327 
1328     return NULL;
1329 }
1330 
CSRA1_AlignmentGetMateAlignment(CSRA1_Alignment * self,ctx_t ctx)1331 CSRA1_Alignment* CSRA1_AlignmentGetMateAlignment( CSRA1_Alignment* self, ctx_t ctx )
1332 {
1333     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
1334     TRY ( int64_t mate_row = NGS_CursorGetInt64 ( GetCursor ( self ), ctx, self -> cur_row, align_MATE_ALIGN_ID ) )
1335     {
1336         if ( ! self -> in_primary )
1337         {
1338             mate_row += self -> id_offset;
1339         }
1340 
1341         {
1342             TRY ( NGS_String * mate_id = NGS_IdMake( ctx,
1343                                                      self -> run_name,
1344                                                      self -> in_primary ? NGSObject_PrimaryAlignment : NGSObject_SecondaryAlignment,
1345                                                      mate_row ) )
1346             {
1347                 CSRA1_Alignment* ret = (CSRA1_Alignment*)
1348                     NGS_ReadCollectionGetAlignment ( CSRA1_ReadCollectionToNGS_ReadCollection ( self -> coll, ctx ),
1349                                                      ctx,
1350                                                      NGS_StringData ( mate_id, ctx ) );
1351                 NGS_StringRelease (mate_id, ctx );
1352                 return ret;
1353             }
1354         }
1355     }
1356     return NULL;
1357 }
1358 
CSRA1_AlignmentIteratorMake(ctx_t ctx,struct CSRA1_ReadCollection * coll,bool primary,bool secondary,const NGS_String * run_name,uint64_t id_offset)1359 NGS_Alignment * CSRA1_AlignmentIteratorMake ( ctx_t ctx,
1360                                               struct CSRA1_ReadCollection * coll,
1361                                               bool primary,
1362                                               bool secondary,
1363                                               const NGS_String * run_name,
1364                                               uint64_t id_offset )
1365 {
1366     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
1367 
1368     CSRA1_Alignment * ref;
1369 
1370     ref = calloc ( 1, sizeof * ref );
1371     if ( ref == NULL )
1372         SYSTEM_ERROR ( xcNoMemory,
1373                        "allocating NGS_AlignmentIterator on '%.*s'",
1374                        NGS_StringSize ( run_name, ctx ),
1375                        NGS_StringData ( run_name, ctx ) );
1376     else
1377     {
1378 #if _DEBUGGING
1379         char instname [ 256 ];
1380         string_printf ( instname,
1381                         sizeof instname,
1382                         NULL,
1383                         "%.*s",
1384                         NGS_StringSize ( run_name, ctx ),
1385                         NGS_StringData ( run_name, ctx ) );
1386         instname [ sizeof instname - 1 ] = 0;
1387 #else
1388         const char *instname = "";
1389 #endif
1390         TRY ( CSRA1_AlignmentInit ( ref, ctx, coll, "NGS_AlignmentIterator", instname, NGS_StringData (run_name, ctx), NGS_StringSize (run_name, ctx), true, primary, secondary, id_offset ) )
1391         {
1392             TRY ( CSRA1_AlignmentInitRegion ( ref, ctx, ref -> primary_curs, ref -> secondary_curs, 0, ULLONG_MAX ) )
1393             {
1394                 return ( NGS_Alignment * ) ref;
1395             }
1396             CSRA1_AlignmentWhack ( ref, ctx );
1397         }
1398 
1399         free ( ref );
1400     }
1401 
1402     return NULL;
1403 }
1404 
CSRA1_AlignmentRangeMake(ctx_t ctx,struct CSRA1_ReadCollection * coll,bool primary,bool secondary,const NGS_String * run_name,uint64_t id_offset,int64_t first,uint64_t count)1405 NGS_Alignment * CSRA1_AlignmentRangeMake ( ctx_t ctx,
1406                                            struct CSRA1_ReadCollection * coll,
1407                                            bool primary,
1408                                            bool secondary,
1409                                            const NGS_String * run_name,
1410                                            uint64_t id_offset,
1411                                            int64_t first,
1412                                            uint64_t count)
1413 {
1414     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
1415 
1416     CSRA1_Alignment * ref;
1417 
1418     ref = calloc ( 1, sizeof * ref );
1419     if ( ref == NULL )
1420         SYSTEM_ERROR ( xcNoMemory,
1421                        "allocating NGS_AlignmentRange on '%.*s'",
1422                        NGS_StringSize ( run_name, ctx ),
1423                        NGS_StringData ( run_name, ctx ) );
1424     else
1425     {
1426 #if _DEBUGGING
1427         char instname [ 256 ];
1428         string_printf ( instname,
1429                         sizeof instname,
1430                         NULL,
1431                         "%.*s",
1432                         NGS_StringSize ( run_name, ctx ),
1433                         NGS_StringData ( run_name, ctx ) );
1434         instname [ sizeof instname - 1 ] = 0;
1435 #else
1436         const char *instname = "";
1437 #endif
1438         TRY ( CSRA1_AlignmentInit ( ref, ctx, coll, "NGS_AlignmentRange", instname, NGS_StringData( run_name, ctx ), NGS_StringSize( run_name, ctx ), true, primary, secondary, id_offset ) )
1439         {
1440             TRY ( CSRA1_AlignmentInitRegion ( ref, ctx, ref -> primary_curs, ref -> secondary_curs, first, count ) )
1441             {
1442                 return ( NGS_Alignment * ) ref;
1443             }
1444             CSRA1_AlignmentWhack ( ref, ctx );
1445         }
1446 
1447         free ( ref );
1448     }
1449 
1450     return NULL;
1451 }
1452 
1453