1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include "CSRA1_ReferenceWindow.h"
28
29 typedef struct CSRA1_ReferenceWindow CSRA1_ReferenceWindow;
30 #define NGS_ALIGNMENT CSRA1_ReferenceWindow
31
32 #include "NGS_Alignment.h"
33 #include "NGS_ReadCollection.h"
34 #include "NGS_Refcount.h"
35
36 #include "NGS_Cursor.h"
37 #include "NGS_String.h"
38 #include "NGS_Id.h"
39
40 #include "CSRA1_Reference.h"
41 #include "CSRA1_Alignment.h"
42
43 #include <sysalloc.h>
44
45 #include <vdb/cursor.h>
46 #include <vdb/vdb-priv.h>
47
48 #include <klib/printf.h>
49 #include <klib/rc.h>
50 #include <klib/sort.h>
51
52 #include <kfc/ctx.h>
53 #include <kfc/rsrc.h>
54 #include <kfc/except.h>
55 #include <kfc/xc.h>
56
57 #include <limits.h>
58
59 #ifndef min
60 # define min(a,b) ( (a) < (b) ? (a) : (b) )
61 #endif
62
63 /*--------------------------------------------------------------------------
64 * CSRA1_ReferenceWindow
65 */
66 enum
67 {
68 Primary = 0,
69 Secondary = 1
70 };
71 struct AlignmentInfo
72 {
73 int64_t id;
74 /* sort order */
75 int64_t pos; /* asc */
76 uint64_t len; /* desc */
77 int8_t cat; /* prim, sec */
78 int32_t mapq; /* desc */
79 };
80 typedef struct AlignmentInfo AlignmentInfo;
81
82 struct CSRA1_ReferenceWindow
83 {
84 NGS_Refcount dad;
85 NGS_ReadCollection * coll;
86
87 const NGS_Cursor * reference_curs;
88
89 bool circular;
90 bool primary;
91 bool secondary;
92 uint32_t filters; /* uses NGS_AlignmentFilterBits from NGS_Alignment.h */
93 int32_t map_qual;
94
95 uint32_t chunk_size;
96 uint64_t ref_length; /* total reference length in bases */
97 uint64_t id_offset;
98
99 /* remaining range of chunks in the reference table */
100 int64_t ref_begin;
101 int64_t ref_end;
102
103 /* for use in a slice iterator: */
104 /* slice (0, 0) = all */
105 uint64_t slice_offset;
106 uint64_t slice_size; /* 0 = the rest of the reference */
107 /* starting chunks for primary/secondary tables */
108 int64_t ref_primary_begin;
109 int64_t ref_secondary_begin;
110
111 /* false - not positioned on any chunk */
112 bool seen_first;
113
114 /* alignments against current chunk, sorted in canonical order */
115 AlignmentInfo* align_info;
116 size_t align_info_cur;
117 size_t align_info_total;
118 NGS_Alignment* cur_align; /* cached current alignment, corresponds to align_info_cur */
119 };
120
121 /* access to filter bits
122 * NB - the polarity of "pass-bad" and "pass-dups" has been inverted,
123 * making these same bits now mean "drop-bad" and "drop-dups". this
124 * was done so that the bits can be tested for non-zero to indicate
125 * need to apply filters.
126 */
127 #define CSRA1_ReferenceWindowFilterDropBad( self ) \
128 ( ( ( self ) -> filters & NGS_AlignmentFilterBits_pass_bad ) != 0 )
129 #define CSRA1_ReferenceWindowFilterDropDups( self ) \
130 ( ( ( self ) -> filters & NGS_AlignmentFilterBits_pass_dups ) != 0 )
131 #define CSRA1_ReferenceWindowFilterMinMapQual( self ) \
132 ( ( ( self ) -> filters & NGS_AlignmentFilterBits_min_map_qual ) != 0 )
133 #define CSRA1_ReferenceWindowFilterMaxMapQual( self ) \
134 ( ( ( self ) -> filters & NGS_AlignmentFilterBits_max_map_qual ) != 0 )
135 #define CSRA1_ReferenceWindowFilterMapQual( self ) \
136 ( ( ( self ) -> filters & NGS_AlignmentFilterBits_map_qual ) != 0 )
137 #define CSRA1_ReferenceWindowFilterNoWraparound( self ) \
138 ( ( ( self ) -> filters & NGS_AlignmentFilterBits_no_wraparound ) != 0 )
139 #define CSRA1_ReferenceWindowFilterStartWithinWindow( self ) \
140 ( ( ( self ) -> filters & NGS_AlignmentFilterBits_start_within_window ) != 0 )
141
142 #define NGS_AlignmentFilterBits_prop_mask \
143 ( NGS_AlignmentFilterBits_pass_bad | NGS_AlignmentFilterBits_pass_dups | NGS_AlignmentFilterBits_map_qual )
144
145
146 /* Whack
147 */
148 static
CSRA1_ReferenceWindowWhack(CSRA1_ReferenceWindow * self,ctx_t ctx)149 void CSRA1_ReferenceWindowWhack ( CSRA1_ReferenceWindow * self, ctx_t ctx )
150 {
151 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcDestroying );
152
153 NGS_AlignmentRelease ( self -> cur_align, ctx );
154 free ( self -> align_info );
155 NGS_CursorRelease ( self -> reference_curs, ctx );
156 NGS_RefcountRelease ( & self -> coll -> dad, ctx );
157 }
158
159 static
GetAlignment(CSRA1_ReferenceWindow * self,ctx_t ctx)160 NGS_Alignment* GetAlignment ( CSRA1_ReferenceWindow* self, ctx_t ctx )
161 {
162 if ( self -> seen_first &&
163 ( self -> circular || self -> ref_begin < self ->ref_end ) && /* for circular references, all chunks are loaded at once */
164 self -> align_info_cur < self -> align_info_total )
165 {
166 if ( self -> cur_align == NULL )
167 {
168 TRY ( NGS_String * run_name = NGS_ReadCollectionGetName ( self -> coll, ctx ) )
169 {
170 TRY ( const NGS_String * id = NGS_IdMake ( ctx,
171 run_name,
172 self -> align_info [ self -> align_info_cur ] . cat == Primary ?
173 NGSObject_PrimaryAlignment:
174 NGSObject_SecondaryAlignment,
175 self -> align_info [ self -> align_info_cur ] . id /* + self -> id_offset ? */) )
176 {
177 self -> cur_align = NGS_ReadCollectionGetAlignment ( self -> coll, ctx, NGS_StringData ( id, ctx ) );
178 NGS_StringRelease ( id, ctx );
179 }
180 NGS_StringRelease ( run_name, ctx );
181 }
182 }
183 return self -> cur_align;
184 }
185 USER_ERROR ( xcIteratorUninitialized, "Invalid alignment" );
186 return NULL;
187 }
188
189
190 static
CSRA1_FragmentGetId(CSRA1_ReferenceWindow * self,ctx_t ctx)191 NGS_String* CSRA1_FragmentGetId ( CSRA1_ReferenceWindow * self, ctx_t ctx )
192 {
193 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
194
195 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
196 {
197 return NGS_FragmentGetId ( (NGS_Fragment*)ref, ctx );
198 }
199 return NULL;
200 }
201
202 static
CSRA1_FragmentGetSequence(CSRA1_ReferenceWindow * self,ctx_t ctx,uint64_t offset,uint64_t length)203 struct NGS_String * CSRA1_FragmentGetSequence ( CSRA1_ReferenceWindow * self, ctx_t ctx, uint64_t offset, uint64_t length )
204 {
205 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
206 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
207 {
208 return NGS_FragmentGetSequence ( (NGS_Fragment*)ref, ctx, offset, length );
209 }
210 return NULL;
211 }
212
213 static
CSRA1_FragmentGetQualities(CSRA1_ReferenceWindow * self,ctx_t ctx,uint64_t offset,uint64_t length)214 struct NGS_String * CSRA1_FragmentGetQualities ( CSRA1_ReferenceWindow * self, ctx_t ctx, uint64_t offset, uint64_t length )
215 {
216 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
217 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
218 {
219 return NGS_FragmentGetQualities ( (NGS_Fragment*)ref, ctx, offset, length );
220 }
221 return NULL;
222 }
223
224 static
CSRA1_FragmentIsPaired(CSRA1_ReferenceWindow * self,ctx_t ctx)225 bool CSRA1_FragmentIsPaired ( CSRA1_ReferenceWindow * self, ctx_t ctx )
226 {
227 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
228 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
229 {
230 return NGS_FragmentIsPaired ( (NGS_Fragment*)ref, ctx );
231 }
232 return false;
233 }
234
235 static
CSRA1_FragmentIsAligned(CSRA1_ReferenceWindow * self,ctx_t ctx)236 bool CSRA1_FragmentIsAligned ( CSRA1_ReferenceWindow * self, ctx_t ctx )
237 {
238 assert ( self != NULL );
239 return true;
240 }
241
242 static
CSRA1_FragmentNext(CSRA1_ReferenceWindow * self,ctx_t ctx)243 bool CSRA1_FragmentNext ( CSRA1_ReferenceWindow * self, ctx_t ctx )
244 {
245 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
246 UNIMPLEMENTED(); /* CSRA1_FragmentNext; should not be called - Alignment is not a FragmentIterator */
247 return false;
248 }
249
250 static
CSRA1_ReferenceWindowGetAlignmentId(CSRA1_ReferenceWindow * self,ctx_t ctx)251 NGS_String * CSRA1_ReferenceWindowGetAlignmentId( CSRA1_ReferenceWindow* self, ctx_t ctx )
252 {
253 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
254
255 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
256 {
257 return NGS_AlignmentGetAlignmentId ( ref, ctx );
258 }
259 return NULL;
260 }
261
262 static
CSRA1_ReferenceWindowGetReferenceSpec(CSRA1_ReferenceWindow * self,ctx_t ctx)263 struct NGS_String* CSRA1_ReferenceWindowGetReferenceSpec( CSRA1_ReferenceWindow* self, ctx_t ctx )
264 {
265 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
266
267 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
268 {
269 return NGS_AlignmentGetReferenceSpec ( ref, ctx );
270 }
271 return NULL;
272 }
273
274 static
CSRA1_ReferenceWindowGetMappingQuality(CSRA1_ReferenceWindow * self,ctx_t ctx)275 int CSRA1_ReferenceWindowGetMappingQuality( CSRA1_ReferenceWindow* self, ctx_t ctx )
276 {
277 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
278
279 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
280 {
281 return NGS_AlignmentGetMappingQuality ( ref, ctx );
282 }
283 return 0;
284 }
285
286 static
CSRA1_ReferenceWindowGetReadFilter(CSRA1_ReferenceWindow * self,ctx_t ctx)287 INSDC_read_filter CSRA1_ReferenceWindowGetReadFilter( CSRA1_ReferenceWindow* self, ctx_t ctx )
288 {
289 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
290
291 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
292 {
293 return NGS_AlignmentGetReadFilter ( ref, ctx );
294 }
295 return 0;
296 }
297
298 static
CSRA1_ReferenceWindowGetReferenceBases(CSRA1_ReferenceWindow * self,ctx_t ctx)299 struct NGS_String* CSRA1_ReferenceWindowGetReferenceBases( CSRA1_ReferenceWindow* self, ctx_t ctx )
300 {
301 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
302
303 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
304 {
305 return NGS_AlignmentGetReferenceBases ( ref, ctx );
306 }
307 return NULL;
308 }
309
310 static
CSRA1_ReferenceWindowGetReadGroup(CSRA1_ReferenceWindow * self,ctx_t ctx)311 struct NGS_String* CSRA1_ReferenceWindowGetReadGroup( CSRA1_ReferenceWindow* self, ctx_t ctx )
312 {
313 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
314
315 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
316 {
317 return NGS_AlignmentGetReadGroup ( ref, ctx );
318 }
319 return NULL;
320 }
321
322 static
CSRA1_ReferenceWindowGetReadId(CSRA1_ReferenceWindow * self,ctx_t ctx)323 NGS_String * CSRA1_ReferenceWindowGetReadId( CSRA1_ReferenceWindow* self, ctx_t ctx )
324 {
325 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
326
327 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
328 {
329 return NGS_AlignmentGetReadId ( ref, ctx );
330 }
331 return NULL;
332 }
333
334 static
CSRA1_ReferenceWindowGetClippedFragmentBases(CSRA1_ReferenceWindow * self,ctx_t ctx)335 struct NGS_String* CSRA1_ReferenceWindowGetClippedFragmentBases( CSRA1_ReferenceWindow* self, ctx_t ctx )
336 {
337 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
338
339 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
340 {
341 return NGS_AlignmentGetClippedFragmentBases ( ref, ctx );
342 }
343 return NULL;
344 }
345
346 static
CSRA1_ReferenceWindowGetClippedFragmentQualities(CSRA1_ReferenceWindow * self,ctx_t ctx)347 struct NGS_String* CSRA1_ReferenceWindowGetClippedFragmentQualities( CSRA1_ReferenceWindow* self, ctx_t ctx )
348 {
349 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
350
351 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
352 {
353 return NGS_AlignmentGetClippedFragmentQualities ( ref, ctx );
354 }
355 return NULL;
356 }
357
358 static
CSRA1_ReferenceWindowGetAlignedFragmentBases(CSRA1_ReferenceWindow * self,ctx_t ctx)359 struct NGS_String* CSRA1_ReferenceWindowGetAlignedFragmentBases( CSRA1_ReferenceWindow* self, ctx_t ctx )
360 {
361 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
362
363 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
364 {
365 return NGS_AlignmentGetAlignedFragmentBases ( ref, ctx );
366 }
367 return NULL;
368 }
369
370 static
CSRA1_ReferenceWindowIsPrimary(CSRA1_ReferenceWindow * self,ctx_t ctx)371 bool CSRA1_ReferenceWindowIsPrimary( CSRA1_ReferenceWindow* self, ctx_t ctx )
372 {
373 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
374
375 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
376 {
377 return NGS_AlignmentIsPrimary ( ref, ctx );
378 }
379 return false;
380 }
381
382 static
CSRA1_ReferenceWindowGetAlignmentPosition(CSRA1_ReferenceWindow * self,ctx_t ctx)383 int64_t CSRA1_ReferenceWindowGetAlignmentPosition( CSRA1_ReferenceWindow* self, ctx_t ctx )
384 {
385 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
386
387 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
388 {
389 return NGS_AlignmentGetAlignmentPosition ( ref, ctx );
390 }
391 return 0;
392 }
393
394 static
CSRA1_ReferenceWindowGetReferencePositionProjectionRange(CSRA1_ReferenceWindow * self,ctx_t ctx,int64_t ref_pos)395 uint64_t CSRA1_ReferenceWindowGetReferencePositionProjectionRange( CSRA1_ReferenceWindow* self, ctx_t ctx, int64_t ref_pos )
396 {
397 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
398
399 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
400 {
401 return NGS_AlignmentGetReferencePositionProjectionRange ( ref, ctx, ref_pos );
402 }
403 return 0;
404 }
405
406 static
CSRA1_ReferenceWindowGetAlignmentLength(CSRA1_ReferenceWindow * self,ctx_t ctx)407 uint64_t CSRA1_ReferenceWindowGetAlignmentLength( CSRA1_ReferenceWindow* self, ctx_t ctx )
408 {
409 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
410
411 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
412 {
413 return NGS_AlignmentGetAlignmentLength ( ref, ctx );
414 }
415 return 0;
416 }
417
418 static
CSRA1_ReferenceWindowGetIsReversedOrientation(CSRA1_ReferenceWindow * self,ctx_t ctx)419 bool CSRA1_ReferenceWindowGetIsReversedOrientation( CSRA1_ReferenceWindow* self, ctx_t ctx )
420 {
421 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
422
423 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
424 {
425 return NGS_AlignmentGetIsReversedOrientation ( ref, ctx );
426 }
427 return false;
428 }
429
430 static
CSRA1_ReferenceWindowGetSoftClip(CSRA1_ReferenceWindow * self,ctx_t ctx,bool left)431 int CSRA1_ReferenceWindowGetSoftClip( CSRA1_ReferenceWindow* self, ctx_t ctx, bool left )
432 {
433 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
434
435 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
436 {
437 return NGS_AlignmentGetSoftClip ( ref, ctx, left );
438 }
439 return 0;
440 }
441
442 static
CSRA1_ReferenceWindowGetTemplateLength(CSRA1_ReferenceWindow * self,ctx_t ctx)443 uint64_t CSRA1_ReferenceWindowGetTemplateLength( CSRA1_ReferenceWindow* self, ctx_t ctx )
444 {
445 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
446
447 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
448 {
449 return NGS_AlignmentGetTemplateLength ( ref, ctx );
450 }
451 return 0;
452 }
453
454 static
CSRA1_ReferenceWindowGetShortCigar(CSRA1_ReferenceWindow * self,ctx_t ctx,bool clipped)455 struct NGS_String* CSRA1_ReferenceWindowGetShortCigar( CSRA1_ReferenceWindow* self, ctx_t ctx, bool clipped )
456 {
457 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
458
459 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
460 {
461 return NGS_AlignmentGetShortCigar ( ref, ctx, clipped );
462 }
463 return NULL;
464 }
465
466 static
CSRA1_ReferenceWindowGetLongCigar(CSRA1_ReferenceWindow * self,ctx_t ctx,bool clipped)467 struct NGS_String* CSRA1_ReferenceWindowGetLongCigar( CSRA1_ReferenceWindow* self, ctx_t ctx, bool clipped )
468 {
469 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
470
471 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
472 {
473 return NGS_AlignmentGetLongCigar ( ref, ctx, clipped );
474 }
475 return NULL;
476 }
477
478 static
CSRA1_ReferenceWindowGetRNAOrientation(CSRA1_ReferenceWindow * self,ctx_t ctx)479 char CSRA1_ReferenceWindowGetRNAOrientation( CSRA1_ReferenceWindow* self, ctx_t ctx )
480 {
481 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
482
483 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
484 {
485 return NGS_AlignmentGetRNAOrientation ( ref, ctx );
486 }
487 return false;
488 }
489
490 static
CSRA1_ReferenceWindowHasMate(CSRA1_ReferenceWindow * self,ctx_t ctx)491 bool CSRA1_ReferenceWindowHasMate( CSRA1_ReferenceWindow* self, ctx_t ctx )
492 {
493 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
494
495 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
496 {
497 return NGS_AlignmentHasMate ( ref, ctx );
498 }
499 CLEAR(); /* we do not want HasMate to ever throw, as a favor to C++/Java front ends */
500 return false;
501 }
502
503 static
CSRA1_ReferenceWindowGetMateAlignmentId(CSRA1_ReferenceWindow * self,ctx_t ctx)504 struct NGS_String* CSRA1_ReferenceWindowGetMateAlignmentId( CSRA1_ReferenceWindow* self, ctx_t ctx )
505 {
506 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
507
508 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
509 {
510 return NGS_AlignmentGetMateAlignmentId ( ref, ctx );
511 }
512 return 0;
513 }
514
515 static
CSRA1_ReferenceWindowGetMateAlignment(CSRA1_ReferenceWindow * self,ctx_t ctx)516 CSRA1_ReferenceWindow* CSRA1_ReferenceWindowGetMateAlignment( CSRA1_ReferenceWindow* self, ctx_t ctx )
517 {
518 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
519
520 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
521 {
522 return ( CSRA1_ReferenceWindow * ) NGS_AlignmentGetMateAlignment( ref, ctx );
523 }
524 return NULL;
525 }
526
527 static
CSRA1_ReferenceWindowGetMateReferenceSpec(CSRA1_ReferenceWindow * self,ctx_t ctx)528 struct NGS_String* CSRA1_ReferenceWindowGetMateReferenceSpec( CSRA1_ReferenceWindow* self, ctx_t ctx )
529 {
530 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
531
532 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
533 {
534 return NGS_AlignmentGetMateReferenceSpec ( ref, ctx );
535 }
536 return NULL;
537 }
538
539 static
CSRA1_ReferenceWindowGetMateIsReversedOrientation(CSRA1_ReferenceWindow * self,ctx_t ctx)540 bool CSRA1_ReferenceWindowGetMateIsReversedOrientation( CSRA1_ReferenceWindow* self, ctx_t ctx )
541 {
542 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
543
544 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
545 {
546 return NGS_AlignmentGetMateIsReversedOrientation ( ref, ctx );
547 }
548 return false;
549 }
550
551 static
CSRA1_ReferenceWindowIsFirst(CSRA1_ReferenceWindow * self,ctx_t ctx)552 bool CSRA1_ReferenceWindowIsFirst( CSRA1_ReferenceWindow* self, ctx_t ctx )
553 {
554 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
555
556 TRY ( NGS_Alignment* ref = GetAlignment ( self, ctx ) )
557 {
558 return NGS_AlignmentIsFirst ( ref, ctx );
559 }
560 return false;
561 }
562
563 /*--------------------------------------------------------------------------
564 * Iterator
565 */
566 static
AlignmentSort(const void * p_a,const void * p_b,void * data)567 int64_t AlignmentSort ( const void * p_a, const void * p_b, void *data )
568 {
569 const struct AlignmentInfo* a = ( const struct AlignmentInfo * ) p_a;
570 const struct AlignmentInfo* b = ( const struct AlignmentInfo * ) p_b;
571
572 if ( a -> pos < b -> pos )
573 return -1;
574 else if ( a -> pos > b -> pos )
575 return 1;
576
577 /* cannot use uint64_t - uint64_t because of possible overflow */
578 if ( a -> len < b -> len ) return 1;
579 if ( a -> len > b -> len ) return -1;
580
581 if ( a -> cat != b -> cat )
582 return (int64_t) a -> cat - (int64_t) b -> cat;
583
584 /* sort by mapq in reverse order */
585 if ( a -> mapq != b -> mapq )
586 return (int64_t) b -> mapq - (int64_t) a -> mapq;
587
588 /* use row id as the last resort, to make sorting more predictable */
589 return a -> id < b -> id ? -1 : a -> id > b -> id;
590 }
591
592 static
593 bool
ApplyFilters(CSRA1_ReferenceWindow * self,ctx_t ctx,NGS_Alignment * p_al,uint32_t * p_map_qual)594 ApplyFilters( CSRA1_ReferenceWindow* self, ctx_t ctx, NGS_Alignment* p_al, uint32_t* p_map_qual )
595 {
596 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
597
598 bool have_map_qual = false;
599 uint32_t map_qual;
600
601 /* test for additional filtering */
602 if ( ( self -> filters & NGS_AlignmentFilterBits_prop_mask ) != 0 )
603 {
604 TRY ( INSDC_read_filter read_filter = NGS_AlignmentGetReadFilter ( p_al, ctx ) )
605 {
606 switch ( read_filter )
607 {
608 case READ_FILTER_PASS:
609 if ( CSRA1_ReferenceWindowFilterMapQual ( self ) )
610 {
611 TRY ( map_qual = NGS_AlignmentGetMappingQuality ( p_al, ctx ) )
612 {
613 have_map_qual = true;
614 if ( CSRA1_ReferenceWindowFilterMinMapQual ( self ) )
615 {
616 /* map_qual must be >= filter level */
617 if ( map_qual < self -> map_qual )
618 return false;
619 }
620 else
621 {
622 /* map qual must be <= filter level */
623 if ( map_qual > self -> map_qual )
624 return false;
625 }
626 }
627 }
628 break;
629 case READ_FILTER_REJECT:
630 if ( CSRA1_ReferenceWindowFilterDropBad ( self ) )
631 return false;
632 break;
633 case READ_FILTER_CRITERIA:
634 if ( CSRA1_ReferenceWindowFilterDropDups ( self ) )
635 return false;
636 break;
637 case READ_FILTER_REDACTED:
638 return false;
639 break;
640 }
641 }
642 }
643 *p_map_qual = have_map_qual ? map_qual : NGS_AlignmentGetMappingQuality ( p_al, ctx );
644 return true;
645 }
646
647 static
LoadAlignmentInfo(CSRA1_ReferenceWindow * self,ctx_t ctx,size_t * idx,int64_t id,bool primary,int64_t offset,uint64_t size,bool wraparoundOnly)648 void LoadAlignmentInfo ( CSRA1_ReferenceWindow* self, ctx_t ctx, size_t* idx, int64_t id, bool primary, int64_t offset, uint64_t size, bool wraparoundOnly )
649 {
650 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
651
652 TRY ( NGS_Alignment* al = CSRA1_AlignmentMake ( ctx,
653 ( struct CSRA1_ReadCollection * ) self -> coll,
654 id,
655 "", 0,
656 primary,
657 self -> id_offset ) )
658 {
659 int64_t pos = NGS_AlignmentGetAlignmentPosition ( al, ctx );
660 int64_t len = (int64_t) NGS_AlignmentGetAlignmentLength ( al, ctx );
661
662 if ( ! CSRA1_ReferenceWindowFilterStartWithinWindow ( self ) || pos >= offset )
663 {
664 bool overlaps = true;
665
666 if ( size > 0 )
667 { /* a slice*/
668 int64_t end_slice = offset + (int64_t)size;
669 if ( end_slice > (int64_t) self -> ref_length )
670 {
671 end_slice = self -> ref_length;
672 }
673 if ( ! CSRA1_ReferenceWindowFilterStartWithinWindow ( self ) &&
674 pos + len >= (int64_t) self -> ref_length )
675 { /* account for possible wraparounds on a circular reference */
676 if ( wraparoundOnly )
677 {
678 if ( end_slice == self -> ref_length )
679 { /* both slice and alignment wrap around */
680 overlaps = true;
681 }
682 else
683 { /* alignment wraps around and overlaps with the slice */
684 overlaps = pos + len > self -> ref_length + offset;
685 }
686 /*printf("LoadAlignmentInfo(offset=%li, size=%lu) pos=%li len=%li overlaps=%i\n", offset, size, pos, len, (int)overlaps);*/
687 }
688 else
689 { /* ignore the wraparound */
690 overlaps = false;
691 }
692 }
693 else if ( wraparoundOnly )
694 {
695 overlaps = false;
696 }
697 else
698 {
699 overlaps = pos < end_slice && ( pos + len > offset );
700 }
701 }
702 else if ( ! CSRA1_ReferenceWindowFilterStartWithinWindow ( self ) && wraparoundOnly )
703 {
704 if ( pos + len < (int64_t) self -> ref_length )
705 {
706 overlaps = false;
707 }
708 }
709
710 if ( overlaps )
711 {
712 uint32_t map_qual;
713 if ( ApplyFilters ( self, ctx, al, &map_qual ) )
714 { /* accept record */
715 /*printf("pos=%li, len=%li, end=%li, q=%i, id=%li, wrap=%i\n", pos, len, pos+len, NGS_AlignmentGetMappingQuality ( al, ctx ), id, wraparoundOnly);*/
716 self -> align_info [ *idx ] . id = id;
717 self -> align_info [ *idx ] . pos = pos;
718 self -> align_info [ *idx ] . len = len;
719 self -> align_info [ *idx ] . cat = primary ? Primary : Secondary;
720 self -> align_info [ *idx ] . mapq = map_qual;
721 ++ ( * idx );
722 }
723 }
724 }
725
726 NGS_AlignmentRelease ( al, ctx );
727 }
728 CATCH ( xcSecondaryAlignmentMissingPrimary )
729 {
730 CLEAR ();
731 }
732 }
733
734 static
LoadAlignmentIndex(CSRA1_ReferenceWindow * self,ctx_t ctx,int64_t row_id,uint32_t id_col_idx,const int64_t ** p_base,uint32_t * p_length)735 void LoadAlignmentIndex ( CSRA1_ReferenceWindow* self, ctx_t ctx, int64_t row_id, uint32_t id_col_idx, const int64_t** p_base, uint32_t* p_length )
736 {
737 const void * base;
738 uint32_t elem_bits, boff, row_len;
739 TRY ( NGS_CursorCellDataDirect ( self -> reference_curs,
740 ctx,
741 row_id,
742 id_col_idx,
743 & elem_bits,
744 & base,
745 & boff,
746 & row_len ) )
747 {
748 assert ( elem_bits == 64 );
749 assert ( boff == 0 );
750 *p_base = ( const int64_t* ) base;
751 *p_length = row_len;
752 }
753 }
754
755 static
AlignmentSortCircular(const void * p_a,const void * p_b,void * data)756 int64_t AlignmentSortCircular ( const void * p_a, const void * p_b, void *data )
757 {
758 const struct AlignmentInfo* a = ( const struct AlignmentInfo * ) p_a;
759 const struct AlignmentInfo* b = ( const struct AlignmentInfo * ) p_b;
760
761 uint64_t total = *(uint64_t*)data;
762 int64_t a_start = a -> pos;
763 int64_t b_start = b -> pos;
764 if ( ( (uint64_t)a-> pos ) + a -> len > total )
765 {
766 a_start -= total;
767 }
768 if ( ( (uint64_t)b -> pos ) + b -> len > total )
769 {
770 b_start -= total;
771 }
772
773 if ( a_start < b_start )
774 return -1;
775 else if ( a_start > b_start )
776 return 1;
777
778 /* cannot use uint64_t - uint64_t because of possible overflow */
779 if ( a -> len < b -> len ) return 1;
780 if ( a -> len > b -> len ) return -1;
781
782 if ( a -> cat != b -> cat )
783 return (int64_t) a -> cat - (int64_t) b -> cat;
784
785 /* sort by mapq in reverse order */
786 if ( a -> mapq != b -> mapq )
787 return (int64_t) b -> mapq - (int64_t) a -> mapq;
788
789 /* use row id as the last resort, to make sorting more predictable */
790 return a -> id < b -> id ? -1 : a -> id > b -> id;
791 }
792
793 static
LoadAlignments(CSRA1_ReferenceWindow * self,ctx_t ctx,int64_t chunk_row_id,int64_t offset,uint64_t size,bool wraparounds)794 void LoadAlignments ( CSRA1_ReferenceWindow* self, ctx_t ctx, int64_t chunk_row_id, int64_t offset, uint64_t size, bool wraparounds )
795 { /* append alignments for the specified chunk to self -> align_info */
796 const int64_t* primary_idx = NULL;
797 uint32_t primary_idx_end = 0;
798 const int64_t* secondary_idx = NULL;
799 uint32_t secondary_idx_end = 0;
800 uint32_t total_added = 0;
801
802 if ( self -> primary && self -> ref_primary_begin <= chunk_row_id )
803 {
804 ON_FAIL ( LoadAlignmentIndex ( self, ctx, chunk_row_id, reference_PRIMARY_ALIGNMENT_IDS, & primary_idx, & primary_idx_end ) )
805 return;
806 }
807
808 if ( self -> secondary && self -> ref_secondary_begin <= chunk_row_id )
809 {
810 ON_FAIL ( LoadAlignmentIndex ( self, ctx, chunk_row_id, reference_SECONDARY_ALIGNMENT_IDS, & secondary_idx, & secondary_idx_end ) )
811 {
812 if ( GetRCObject ( ctx -> rc ) == ( enum RCObject )rcColumn && GetRCState ( ctx -> rc ) == rcNotFound )
813 { /* SECONDARY_ALIGNMENT_IDS is missing; no problem */
814 self -> secondary = false; /* do not try anymore */
815 CLEAR();
816 }
817 else
818 {
819 return;
820 }
821 }
822 }
823
824 total_added = primary_idx_end + secondary_idx_end;
825 if ( total_added > 0 )
826 {
827 self -> align_info = realloc ( self -> align_info, ( self -> align_info_total + total_added ) * sizeof ( * self -> align_info ) );
828 if ( self -> align_info == NULL )
829 {
830 SYSTEM_ERROR ( xcNoMemory, "allocating CSRA1_ReferenceWindow chunk" );
831 return;
832 }
833 else
834 {
835 uint32_t i;
836 for ( i = 0; i < primary_idx_end; ++i )
837 {
838 ON_FAIL ( LoadAlignmentInfo( self, ctx, & self -> align_info_total, primary_idx [ i ], true, offset, size, wraparounds ) )
839 return;
840 }
841 for ( i = 0; i < secondary_idx_end; ++i )
842 {
843 ON_FAIL ( LoadAlignmentInfo( self, ctx, & self -> align_info_total, secondary_idx [ i ] + self -> id_offset, false, offset, size, wraparounds ) )
844 return;
845 }
846 }
847 }
848 /* now self -> align_info_total is the actual number of alignments currently loaded into self->align_info (can be less than allocated for) */
849 }
850
851 static
LoadFirstCircular(CSRA1_ReferenceWindow * self,ctx_t ctx)852 bool LoadFirstCircular ( CSRA1_ReferenceWindow* self, ctx_t ctx )
853 { /* load the first chunk of a circular reference (other chunks will go through LoadNextChunk) */
854 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
855 int64_t last_chunk = self -> ref_end - 1;
856 assert ( self );
857
858 self -> align_info_total = 0;
859
860 /* for windows on circular references, self->ref_begin and and self->ref_end - 1
861 are the rowId's of the first and last chunk of the reference, regardless of slicing */
862 if ( ! CSRA1_ReferenceWindowFilterNoWraparound ( self ) && self -> ref_begin < last_chunk )
863 { /* load the last chunk of the reference, to cover possible overlaps into the first chunk */
864 if ( self -> slice_size == 0 )
865 { /* loading possible overlaps with the first chunk */
866 ON_FAIL ( LoadAlignments ( self, ctx, last_chunk, 0, self -> chunk_size, true ) )
867 return false;
868 }
869 else if ( self -> slice_offset < self -> chunk_size )
870 { /* the slice starts in the first chunk; load alignments wrapped around from the last chunk */
871 ON_FAIL ( LoadAlignments ( self, ctx, last_chunk, self -> slice_offset, self -> chunk_size - self -> slice_offset, true ) )
872 return false;
873 }
874 else if ( self -> slice_offset + self -> slice_size > self -> ref_length )
875 { /* the slice starts in the last and wraps around to the first chunk; load alignments wrapped around into the first chunk */
876 ON_FAIL ( LoadAlignments ( self, ctx, last_chunk, self -> slice_offset, self -> slice_size, true ) )
877 return false;
878 }
879 /* target slice is not in the first chunk, no need to look for overlaps from the end of the reference */
880 }
881
882 ON_FAIL ( LoadAlignments ( self, ctx, self -> ref_begin, self -> slice_offset, self -> slice_size, false ) )
883 return false;
884
885 if ( self -> align_info_total > 0 )
886 {
887 ksort ( self -> align_info, self -> align_info_total, sizeof ( * self -> align_info ), AlignmentSortCircular, & self -> ref_length );
888 self -> align_info_cur = 0;
889 return true;
890 }
891 return false;
892 }
893
894 static
LoadNextChunk(CSRA1_ReferenceWindow * self,ctx_t ctx)895 bool LoadNextChunk ( CSRA1_ReferenceWindow* self, ctx_t ctx )
896 {
897 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
898
899 assert ( self );
900
901 self -> align_info_total = 0;
902 while ( self -> ref_begin < self -> ref_end )
903 {
904 ON_FAIL ( LoadAlignments ( self, ctx, self -> ref_begin, self -> slice_offset, self -> slice_size, false ) )
905 return false;
906
907 if ( self -> align_info_total > 0 )
908 {
909 ksort ( self -> align_info, self -> align_info_total, sizeof ( * self -> align_info ), AlignmentSort, NULL );
910 self -> align_info_cur = 0;
911
912 return true;
913 }
914
915 /* this chunk had no alignments - move to the next one */
916 ++ self -> ref_begin;
917 }
918
919 return false;
920 }
921
922 static
CSRA1_ReferenceWindowIteratorNext(CSRA1_ReferenceWindow * self,ctx_t ctx)923 bool CSRA1_ReferenceWindowIteratorNext ( CSRA1_ReferenceWindow* self, ctx_t ctx )
924 {
925 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcReading );
926
927 if ( ! self -> seen_first )
928 { /* first call - position on the first alignment */
929 self -> seen_first = true;
930 if ( self -> circular )
931 {
932 return LoadFirstCircular ( self, ctx );
933 }
934 }
935 else
936 {
937 /* clear cached alignment*/
938 NGS_AlignmentRelease ( self -> cur_align, ctx );
939 self -> cur_align = NULL;
940
941 ++ self -> align_info_cur;
942 if ( self -> align_info_cur < self -> align_info_total )
943 return true;
944
945 ++ self -> ref_begin;
946 }
947
948 return LoadNextChunk ( self, ctx );
949 }
950
951 static NGS_Alignment_vt CSRA1_ReferenceWindow_vt_inst =
952 {
953 {
954 { /* NGS_Refcount */
955 CSRA1_ReferenceWindowWhack
956 },
957
958 /* NGS_Fragment */
959 CSRA1_FragmentGetId,
960 CSRA1_FragmentGetSequence,
961 CSRA1_FragmentGetQualities,
962 CSRA1_FragmentIsPaired,
963 CSRA1_FragmentIsAligned,
964 CSRA1_FragmentNext
965 },
966
967 CSRA1_ReferenceWindowGetAlignmentId,
968 CSRA1_ReferenceWindowGetReferenceSpec,
969 CSRA1_ReferenceWindowGetMappingQuality,
970 CSRA1_ReferenceWindowGetReadFilter,
971 CSRA1_ReferenceWindowGetReferenceBases,
972 CSRA1_ReferenceWindowGetReadGroup,
973 CSRA1_ReferenceWindowGetReadId,
974 CSRA1_ReferenceWindowGetClippedFragmentBases,
975 CSRA1_ReferenceWindowGetClippedFragmentQualities,
976 CSRA1_ReferenceWindowGetAlignedFragmentBases,
977 CSRA1_ReferenceWindowIsPrimary,
978 CSRA1_ReferenceWindowGetAlignmentPosition,
979 CSRA1_ReferenceWindowGetReferencePositionProjectionRange,
980 CSRA1_ReferenceWindowGetAlignmentLength,
981 CSRA1_ReferenceWindowGetIsReversedOrientation,
982 CSRA1_ReferenceWindowGetSoftClip,
983 CSRA1_ReferenceWindowGetTemplateLength,
984 CSRA1_ReferenceWindowGetShortCigar,
985 CSRA1_ReferenceWindowGetLongCigar,
986 CSRA1_ReferenceWindowGetRNAOrientation,
987 CSRA1_ReferenceWindowHasMate,
988 CSRA1_ReferenceWindowGetMateAlignmentId,
989 CSRA1_ReferenceWindowGetMateAlignment,
990 CSRA1_ReferenceWindowGetMateReferenceSpec,
991 CSRA1_ReferenceWindowGetMateIsReversedOrientation,
992 CSRA1_ReferenceWindowIsFirst,
993
994 /* Iterator */
995 CSRA1_ReferenceWindowIteratorNext
996 };
997
998 static
CSRA1_ReferenceWindowInit(CSRA1_ReferenceWindow * ref,ctx_t ctx,NGS_ReadCollection * coll,const struct NGS_Cursor * curs,bool circular,uint64_t ref_length,uint32_t chunk_size,int64_t primary_begin_row,int64_t secondary_begin_row,int64_t end_row,uint64_t offset,uint64_t size,bool primary,bool secondary,uint32_t filters,int32_t map_qual,uint64_t id_offset)999 void CSRA1_ReferenceWindowInit ( CSRA1_ReferenceWindow * ref,
1000 ctx_t ctx,
1001 NGS_ReadCollection * coll,
1002 const struct NGS_Cursor* curs,
1003 bool circular,
1004 uint64_t ref_length,
1005 uint32_t chunk_size,
1006 int64_t primary_begin_row,
1007 int64_t secondary_begin_row,
1008 int64_t end_row,
1009 uint64_t offset,
1010 uint64_t size, /* 0 - all remaining */
1011 bool primary,
1012 bool secondary,
1013 uint32_t filters,
1014 int32_t map_qual,
1015 uint64_t id_offset )
1016 {
1017 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
1018
1019 TRY ( NGS_AlignmentInit ( ctx, ref, & CSRA1_ReferenceWindow_vt_inst, "CSRA1_ReferenceWindow", "" ) )
1020 {
1021 TRY ( ref -> coll = (NGS_ReadCollection *) NGS_RefcountDuplicate ( & coll -> dad, ctx ) )
1022 {
1023 ref -> reference_curs = NGS_CursorDuplicate ( curs, ctx );
1024 ref -> circular = circular;
1025 ref -> primary = primary;
1026 ref -> secondary = secondary;
1027 /* see comment above about inverting polarity of the "pass" bits to create "drop" bits */
1028 ref -> filters = filters ^ ( NGS_AlignmentFilterBits_pass_bad | NGS_AlignmentFilterBits_pass_dups );
1029 ref -> map_qual = map_qual;
1030 ref -> chunk_size = chunk_size;
1031 ref -> ref_length = ref_length;
1032 ref -> id_offset = id_offset;
1033 ref -> ref_begin = min (primary_begin_row, secondary_begin_row);
1034 ref -> ref_primary_begin = primary_begin_row;
1035 ref -> ref_secondary_begin = secondary_begin_row;
1036 ref -> ref_end = end_row;
1037 ref -> slice_offset = offset;
1038 ref -> slice_size = size;
1039 }
1040 }
1041 }
1042
1043 /* MakeCommon
1044 * makes a common alignment from VCursor
1045 */
CSRA1_ReferenceWindowMake(ctx_t ctx,struct NGS_ReadCollection * coll,const struct NGS_Cursor * curs,bool circular,uint64_t ref_length,uint32_t chunk_size,int64_t primary_begin_row,int64_t secondary_begin_row,int64_t end_row,uint64_t offset,uint64_t size,bool primary,bool secondary,uint32_t filters,int32_t map_qual,uint64_t id_offset)1046 NGS_Alignment * CSRA1_ReferenceWindowMake ( ctx_t ctx,
1047 struct NGS_ReadCollection * coll,
1048 const struct NGS_Cursor* curs,
1049 bool circular,
1050 uint64_t ref_length,
1051 uint32_t chunk_size,
1052 int64_t primary_begin_row,
1053 int64_t secondary_begin_row,
1054 int64_t end_row,
1055 uint64_t offset,
1056 uint64_t size, /* 0 - all remaining */
1057 bool primary,
1058 bool secondary,
1059 uint32_t filters,
1060 int32_t map_qual,
1061 uint64_t id_offset )
1062 {
1063 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
1064
1065 CSRA1_ReferenceWindow * ref;
1066
1067 assert ( coll != NULL );
1068
1069 ref = calloc ( 1, sizeof * ref );
1070 if ( ref == NULL )
1071 SYSTEM_ERROR ( xcNoMemory, "allocating CSRA1_ReferenceWindow" );
1072 else
1073 {
1074 TRY ( CSRA1_ReferenceWindowInit ( ref,
1075 ctx,
1076 coll,
1077 curs,
1078 circular,
1079 ref_length,
1080 chunk_size,
1081 primary_begin_row,
1082 secondary_begin_row,
1083 end_row,
1084 offset,
1085 size,
1086 primary,
1087 secondary,
1088 filters,
1089 map_qual,
1090 id_offset ) )
1091 {
1092 return ( NGS_Alignment * ) ref;
1093 }
1094
1095 free ( ref );
1096 }
1097
1098 return NULL;
1099 }
1100