1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include "SRA_ReadGroup.h"
28 
29 #include "SRA_Read.h"
30 #include "SRA_ReadGroupInfo.h"
31 #include "SRA_Statistics.h"
32 
33 #include "NGS_String.h"
34 #include "NGS_Cursor.h"
35 #include "NGS_Id.h"
36 
37 #include <kfc/ctx.h>
38 #include <kfc/rsrc.h>
39 #include <kfc/except.h>
40 #include <kfc/xc.h>
41 #include <klib/text.h>
42 #include <klib/printf.h>
43 #include <klib/refcount.h>
44 #include <vdb/cursor.h>
45 #include <vdb/schema.h>
46 #include <vdb/vdb-priv.h>
47 #include <insdc/insdc.h>
48 
49 #include <strtol.h> /* strtoi64 */
50 
51 #include <stddef.h>
52 #include <assert.h>
53 
54 #include <sysalloc.h>
55 
56 /*--------------------------------------------------------------------------
57  * SRA_Read
58  */
59 
60 struct SRA_ReadGroup
61 {
62     NGS_ReadGroup dad;
63 
64     const NGS_String * run_name;
65     const NGS_String * name; /* owns the char buffer */
66 
67     const NGS_Cursor * curs;
68     const SRA_ReadGroupInfo* group_info;
69 
70     bool seen_first;
71     bool iterating;
72 
73     uint32_t cur_group;
74 };
75 
76 static void                     SRA_ReadGroupWhack ( SRA_ReadGroup * self, ctx_t ctx );
77 static struct NGS_String*       SRA_ReadGroupGetName ( const SRA_ReadGroup * self, ctx_t ctx );
78 static struct NGS_Read*         SRA_ReadGroupGetReads ( const SRA_ReadGroup * self, ctx_t ctx, bool wants_full, bool wants_partial, bool wants_unaligned );
79 static struct NGS_Read*         SRA_ReadGroupGetRead ( const SRA_ReadGroup * self, ctx_t ctx, const char* readId );
80 static struct NGS_Statistics*   SRA_ReadGroupGetStatistics ( const SRA_ReadGroup * self, ctx_t ctx );
81 static bool                     SRA_ReadGroupIteratorNext ( SRA_ReadGroup * self, ctx_t ctx );
82 
83 static NGS_ReadGroup_vt NGS_ReadGroup_vt_inst =
84 {
85     {
86         /* NGS_RefCount */
87         SRA_ReadGroupWhack
88     },
89 
90     /* NGS_ReadGroup */
91     SRA_ReadGroupGetName,
92     SRA_ReadGroupGetReads,
93     SRA_ReadGroupGetRead,
94     SRA_ReadGroupGetStatistics,
95     SRA_ReadGroupIteratorNext,
96 };
97 
98 /* Init
99  */
100 static
SRA_ReadGroupInit(ctx_t ctx,SRA_ReadGroup * self,const char * clsname,const char * instname,const NGS_String * run_name,const char * group_name,size_t group_name_size,const struct SRA_ReadGroupInfo * group_info)101 void SRA_ReadGroupInit ( ctx_t ctx,
102                          SRA_ReadGroup * self,
103                          const char *clsname,
104                          const char *instname,
105                          const NGS_String* run_name,
106                          const char* group_name, size_t group_name_size,
107                          const struct SRA_ReadGroupInfo* group_info )
108 {
109     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
110 
111     if ( self == NULL )
112         INTERNAL_ERROR ( xcParamNull, "bad object reference" );
113     else
114     {
115         TRY ( NGS_ReadGroupInit ( ctx, & self -> dad, & NGS_ReadGroup_vt_inst, clsname, instname ) )
116         {
117             TRY ( self -> run_name = NGS_StringDuplicate ( run_name, ctx ) )
118             {
119                 TRY ( self -> name = NGS_StringMakeCopy ( ctx, group_name, group_name_size ) )
120                 {
121                     self -> group_info = SRA_ReadGroupInfoDuplicate ( group_info, ctx );
122                 }
123             }
124         }
125     }
126 }
127 
128 /* Whack
129  */
130 static
SRA_ReadGroupWhack(SRA_ReadGroup * self,ctx_t ctx)131 void SRA_ReadGroupWhack ( SRA_ReadGroup * self, ctx_t ctx )
132 {
133     NGS_StringRelease ( self -> run_name, ctx );
134     NGS_StringRelease ( self -> name, ctx );
135     NGS_CursorRelease ( self -> curs, ctx );
136     SRA_ReadGroupInfoRelease ( self -> group_info, ctx );
137 }
138 
SRA_ReadGroupGetName(const SRA_ReadGroup * self,ctx_t ctx)139 struct NGS_String* SRA_ReadGroupGetName ( const SRA_ReadGroup * self, ctx_t ctx )
140 {
141     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
142 
143     assert ( self != NULL );
144 
145     if ( ! self -> seen_first )
146     {
147         USER_ERROR ( xcIteratorUninitialized, "ReadGroup accessed before a call to ReadIteratorNext()" );
148         return NULL;
149     }
150     else if ( self -> cur_group >= self -> group_info -> count )
151     {
152         USER_ERROR ( xcCursorExhausted, "No more rows available" );
153         return NULL;
154     }
155 
156     return NGS_StringDuplicate ( self -> name, ctx );
157 }
158 
SRA_ReadGroupGetReads(const SRA_ReadGroup * self,ctx_t ctx,bool wants_full,bool wants_partial,bool wants_unaligned)159 struct NGS_Read* SRA_ReadGroupGetReads ( const SRA_ReadGroup * self, ctx_t ctx, bool wants_full, bool wants_partial, bool wants_unaligned )
160 {
161     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
162 
163     if ( ! self -> seen_first )
164     {
165         USER_ERROR ( xcIteratorUninitialized, "ReadGroup accessed before a call to ReadIteratorNext()" );
166         return NULL;
167     }
168     else if ( self -> cur_group >= self -> group_info -> count )
169     {
170         USER_ERROR ( xcCursorExhausted, "No more rows available" );
171         return NULL;
172     }
173 
174     {
175         uint64_t start = self -> group_info -> groups [ self -> cur_group ] . min_row;
176         uint64_t count = self -> group_info -> groups [ self -> cur_group ] . max_row - start;
177         return SRA_ReadIteratorMakeReadGroup ( ctx,
178                                                self -> curs,
179                                                self -> run_name,
180                                                self -> name,
181                                                start,
182                                                count,
183                                                wants_full,
184                                                wants_partial,
185                                                wants_unaligned );
186     }
187 }
188 
SRA_ReadGroupGetRead(const SRA_ReadGroup * self,ctx_t ctx,const char * readIdStr)189 struct NGS_Read* SRA_ReadGroupGetRead ( const SRA_ReadGroup * self, ctx_t ctx, const char* readIdStr )
190 {
191     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
192 
193     if ( ! self -> seen_first )
194     {
195         USER_ERROR ( xcIteratorUninitialized, "ReadGroup accessed before a call to ReadIteratorNext()" );
196         return NULL;
197     }
198     else if ( self -> cur_group >= self -> group_info -> count )
199     {
200         USER_ERROR ( xcCursorExhausted, "No more rows available" );
201         return NULL;
202     }
203 
204     {
205         TRY ( struct NGS_Id id = NGS_IdParse ( readIdStr, string_size ( readIdStr ), ctx ) )
206         {
207             if ( string_cmp ( NGS_StringData ( self -> run_name, ctx ),
208                 NGS_StringSize ( self -> run_name, ctx ),
209                 id . run . addr,
210                 id . run . size,
211                 id . run . len ) != 0 )
212             {
213                 INTERNAL_ERROR ( xcArcIncorrect,
214                     " expected '%.*s', actual '%.*s'",
215                     NGS_StringSize ( self -> run_name, ctx ),
216                     NGS_StringData ( self -> run_name, ctx ),
217                     id . run . size,
218                     id . run . addr );
219             }
220             else
221             {
222                 /* make sure the requested read is from this read group */
223                 NGS_Read* ret;
224                 TRY ( ret = SRA_ReadMake ( ctx, self -> curs, id . rowId, self -> run_name ) )
225                 {
226                     TRY ( const NGS_String* readGroup = NGS_ReadGetReadGroup ( ret, ctx ) )
227                     {
228                         if ( string_cmp ( NGS_StringData ( self -> name, ctx ),
229                             NGS_StringSize ( self -> name, ctx ),
230                             NGS_StringData ( readGroup, ctx ),
231                             NGS_StringSize ( readGroup, ctx ),
232                             NGS_StringSize ( readGroup, ctx ) ) == 0 )
233                         {
234                             NGS_StringRelease ( readGroup, ctx );
235                             return ret;
236                         }
237                         INTERNAL_ERROR ( xcWrongReadGroup,
238                             "Requested read is from a difference read group (expected '%.*s', actual '%.s')",
239                             NGS_StringSize ( self -> name, ctx ),
240                             NGS_StringData ( self -> name, ctx ),
241                             NGS_StringSize ( readGroup, ctx ),
242                             NGS_StringData ( readGroup, ctx ) );
243                         NGS_StringRelease ( readGroup, ctx );
244                     }
245                     NGS_ReadRelease ( ret, ctx );
246                 }
247             }
248         }
249     }
250     return NULL;
251 }
252 
SRA_ReadGroupGetStatistics(const SRA_ReadGroup * self,ctx_t ctx)253 static struct NGS_Statistics* SRA_ReadGroupGetStatistics ( const SRA_ReadGroup * self, ctx_t ctx )
254 {
255     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
256 
257     if ( ! self -> seen_first )
258     {
259         USER_ERROR ( xcIteratorUninitialized, "ReadGroup accessed before a call to ReadIteratorNext()" );
260         return NULL;
261     }
262     else if ( self -> cur_group >= self -> group_info -> count )
263     {
264         USER_ERROR ( xcCursorExhausted, "No more rows available" );
265         return NULL;
266     }
267 
268     {
269         const struct SRA_ReadGroupStats * group_stats = & self -> group_info -> groups [ self -> cur_group ];
270         TRY ( NGS_Statistics * ret = SRA_StatisticsMake ( ctx ) )
271         {
272             TRY ( NGS_StatisticsAddU64 ( ret, ctx, "BASE_COUNT", group_stats -> base_count ) )
273             {
274                 TRY ( NGS_StatisticsAddU64 ( ret, ctx, "BIO_BASE_COUNT", group_stats -> bio_base_count ) )
275                 {
276                     TRY ( NGS_StatisticsAddU64 ( ret, ctx, "SPOT_COUNT", group_stats -> row_count ) )
277                     {
278                         TRY ( NGS_StatisticsAddU64 ( ret, ctx, "SPOT_MAX", group_stats -> max_row) )
279                         {
280                             TRY ( NGS_StatisticsAddU64 ( ret, ctx, "SPOT_MIN", group_stats -> min_row) )
281                             {
282                                 return ret;
283                             }
284                         }
285                     }
286                 }
287             }
288             NGS_StatisticsRelease ( ret, ctx );
289         }
290     }
291 
292     return NULL;
293 }
294 
SRA_ReadGroupMake(ctx_t ctx,const struct NGS_Cursor * curs,const struct SRA_ReadGroupInfo * group_info,const struct NGS_String * run_name,const char * group_name,size_t group_name_size)295 struct NGS_ReadGroup * SRA_ReadGroupMake ( ctx_t ctx,
296                                            const struct NGS_Cursor * curs,
297                                            const struct SRA_ReadGroupInfo* group_info,
298                                            const struct NGS_String * run_name,
299                                            const char * group_name, size_t group_name_size )
300 {
301     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
302     SRA_ReadGroup * ref;
303 
304     assert ( curs != NULL );
305     assert ( run_name != NULL );
306 
307     ref = calloc ( 1, sizeof * ref );
308     if ( ref == NULL )
309         SYSTEM_ERROR ( xcNoMemory, "allocating NGS_ReadGroup on '%.*s'", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
310     else
311     {
312 #if _DEBUGGING
313         char instname [ 256 ];
314         string_printf ( instname, sizeof instname, NULL, "%.*s", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
315         instname [ sizeof instname - 1 ] = 0;
316 #else
317         const char *instname = "";
318 #endif
319         TRY ( SRA_ReadGroupInit ( ctx, ref, "NGS_ReadGroup", instname, run_name, group_name, group_name_size, group_info ) )
320         {
321             TRY ( ref -> curs = NGS_CursorDuplicate ( curs, ctx ) )
322             {
323                 TRY ( ref -> cur_group = SRA_ReadGroupInfoFind ( ref -> group_info, ctx, group_name, group_name_size ) )
324                 {
325                     ref -> seen_first = true;
326                     return & ref -> dad;
327                 }
328             }
329             SRA_ReadGroupWhack ( ref, ctx );
330         }
331 
332         free ( ref );
333     }
334 
335     return NULL;
336 }
337 
338 
339 /*--------------------------------------------------------------------------
340  * NGS_ReadGroupIterator
341  */
342 
343 /* Make
344  */
SRA_ReadGroupIteratorMake(ctx_t ctx,const NGS_Cursor * curs,const struct SRA_ReadGroupInfo * group_info,const struct NGS_String * run_name)345 NGS_ReadGroup * SRA_ReadGroupIteratorMake ( ctx_t ctx,
346                                                   const NGS_Cursor * curs,
347                                                   const struct SRA_ReadGroupInfo* group_info,
348                                                   const struct NGS_String * run_name )
349 {
350     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
351 
352     SRA_ReadGroup * ref;
353 
354     assert ( curs != NULL );
355     assert ( run_name != NULL );
356 
357     ref = calloc ( 1, sizeof * ref );
358     if ( ref == NULL )
359         SYSTEM_ERROR ( xcNoMemory, "allocating NGS_ReadGroupIterator on '%.*s'", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
360     else
361     {
362 #if _DEBUGGING
363         char instname [ 256 ];
364         string_printf ( instname, sizeof instname, NULL, "%.*s", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
365         instname [ sizeof instname - 1 ] = 0;
366 #else
367         const char *instname = "";
368 #endif
369         TRY ( SRA_ReadGroupInit ( ctx, ref, "NGS_ReadGroupIterator", instname, run_name, "", 0, group_info ) )
370         {
371             TRY ( ref -> curs = NGS_CursorDuplicate ( curs, ctx ) )
372             {
373                 ref -> iterating = true;
374                 return & ref -> dad;
375             }
376             SRA_ReadGroupWhack ( ref, ctx );
377         }
378 
379         free ( ref );
380     }
381 
382     return NULL;
383 }
384 
385 /* Next
386  */
SRA_ReadGroupIteratorNext(SRA_ReadGroup * self,ctx_t ctx)387 bool SRA_ReadGroupIteratorNext ( SRA_ReadGroup * self, ctx_t ctx )
388 {
389     FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
390 
391     assert ( self != NULL );
392 
393     if ( ! self -> iterating )
394     {
395         USER_ERROR ( xcCursorExhausted, "No more rows available" );
396         return false;
397     }
398 
399     if ( self -> seen_first )
400     {   /* move to next group */
401         ++ self -> cur_group;
402     }
403     else
404     {
405         self -> seen_first = true;
406     }
407 
408     while ( self -> cur_group < self -> group_info -> count )
409     {
410         if ( self -> group_info -> groups [ self -> cur_group ] . min_row == 0 )
411         {
412            ++ self -> cur_group;
413         }
414         else
415         {
416             NGS_StringRelease ( self -> name, ctx );
417             self -> name = NULL;
418             TRY ( self -> name = NGS_StringDuplicate ( self -> group_info -> groups [ self -> cur_group ] . name, ctx ) )
419             {
420                 return true;
421             }
422             /* error - make the iterator unusable */
423             self -> cur_group = self -> group_info -> count;
424             return false;
425         }
426     }
427 
428     return false;
429 }
430