1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include "SRA_ReadGroup.h"
28
29 #include "SRA_Read.h"
30 #include "SRA_ReadGroupInfo.h"
31 #include "SRA_Statistics.h"
32
33 #include "NGS_String.h"
34 #include "NGS_Cursor.h"
35 #include "NGS_Id.h"
36
37 #include <kfc/ctx.h>
38 #include <kfc/rsrc.h>
39 #include <kfc/except.h>
40 #include <kfc/xc.h>
41 #include <klib/text.h>
42 #include <klib/printf.h>
43 #include <klib/refcount.h>
44 #include <vdb/cursor.h>
45 #include <vdb/schema.h>
46 #include <vdb/vdb-priv.h>
47 #include <insdc/insdc.h>
48
49 #include <strtol.h> /* strtoi64 */
50
51 #include <stddef.h>
52 #include <assert.h>
53
54 #include <sysalloc.h>
55
56 /*--------------------------------------------------------------------------
57 * SRA_Read
58 */
59
60 struct SRA_ReadGroup
61 {
62 NGS_ReadGroup dad;
63
64 const NGS_String * run_name;
65 const NGS_String * name; /* owns the char buffer */
66
67 const NGS_Cursor * curs;
68 const SRA_ReadGroupInfo* group_info;
69
70 bool seen_first;
71 bool iterating;
72
73 uint32_t cur_group;
74 };
75
76 static void SRA_ReadGroupWhack ( SRA_ReadGroup * self, ctx_t ctx );
77 static struct NGS_String* SRA_ReadGroupGetName ( const SRA_ReadGroup * self, ctx_t ctx );
78 static struct NGS_Read* SRA_ReadGroupGetReads ( const SRA_ReadGroup * self, ctx_t ctx, bool wants_full, bool wants_partial, bool wants_unaligned );
79 static struct NGS_Read* SRA_ReadGroupGetRead ( const SRA_ReadGroup * self, ctx_t ctx, const char* readId );
80 static struct NGS_Statistics* SRA_ReadGroupGetStatistics ( const SRA_ReadGroup * self, ctx_t ctx );
81 static bool SRA_ReadGroupIteratorNext ( SRA_ReadGroup * self, ctx_t ctx );
82
83 static NGS_ReadGroup_vt NGS_ReadGroup_vt_inst =
84 {
85 {
86 /* NGS_RefCount */
87 SRA_ReadGroupWhack
88 },
89
90 /* NGS_ReadGroup */
91 SRA_ReadGroupGetName,
92 SRA_ReadGroupGetReads,
93 SRA_ReadGroupGetRead,
94 SRA_ReadGroupGetStatistics,
95 SRA_ReadGroupIteratorNext,
96 };
97
98 /* Init
99 */
100 static
SRA_ReadGroupInit(ctx_t ctx,SRA_ReadGroup * self,const char * clsname,const char * instname,const NGS_String * run_name,const char * group_name,size_t group_name_size,const struct SRA_ReadGroupInfo * group_info)101 void SRA_ReadGroupInit ( ctx_t ctx,
102 SRA_ReadGroup * self,
103 const char *clsname,
104 const char *instname,
105 const NGS_String* run_name,
106 const char* group_name, size_t group_name_size,
107 const struct SRA_ReadGroupInfo* group_info )
108 {
109 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
110
111 if ( self == NULL )
112 INTERNAL_ERROR ( xcParamNull, "bad object reference" );
113 else
114 {
115 TRY ( NGS_ReadGroupInit ( ctx, & self -> dad, & NGS_ReadGroup_vt_inst, clsname, instname ) )
116 {
117 TRY ( self -> run_name = NGS_StringDuplicate ( run_name, ctx ) )
118 {
119 TRY ( self -> name = NGS_StringMakeCopy ( ctx, group_name, group_name_size ) )
120 {
121 self -> group_info = SRA_ReadGroupInfoDuplicate ( group_info, ctx );
122 }
123 }
124 }
125 }
126 }
127
128 /* Whack
129 */
130 static
SRA_ReadGroupWhack(SRA_ReadGroup * self,ctx_t ctx)131 void SRA_ReadGroupWhack ( SRA_ReadGroup * self, ctx_t ctx )
132 {
133 NGS_StringRelease ( self -> run_name, ctx );
134 NGS_StringRelease ( self -> name, ctx );
135 NGS_CursorRelease ( self -> curs, ctx );
136 SRA_ReadGroupInfoRelease ( self -> group_info, ctx );
137 }
138
SRA_ReadGroupGetName(const SRA_ReadGroup * self,ctx_t ctx)139 struct NGS_String* SRA_ReadGroupGetName ( const SRA_ReadGroup * self, ctx_t ctx )
140 {
141 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
142
143 assert ( self != NULL );
144
145 if ( ! self -> seen_first )
146 {
147 USER_ERROR ( xcIteratorUninitialized, "ReadGroup accessed before a call to ReadIteratorNext()" );
148 return NULL;
149 }
150 else if ( self -> cur_group >= self -> group_info -> count )
151 {
152 USER_ERROR ( xcCursorExhausted, "No more rows available" );
153 return NULL;
154 }
155
156 return NGS_StringDuplicate ( self -> name, ctx );
157 }
158
SRA_ReadGroupGetReads(const SRA_ReadGroup * self,ctx_t ctx,bool wants_full,bool wants_partial,bool wants_unaligned)159 struct NGS_Read* SRA_ReadGroupGetReads ( const SRA_ReadGroup * self, ctx_t ctx, bool wants_full, bool wants_partial, bool wants_unaligned )
160 {
161 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
162
163 if ( ! self -> seen_first )
164 {
165 USER_ERROR ( xcIteratorUninitialized, "ReadGroup accessed before a call to ReadIteratorNext()" );
166 return NULL;
167 }
168 else if ( self -> cur_group >= self -> group_info -> count )
169 {
170 USER_ERROR ( xcCursorExhausted, "No more rows available" );
171 return NULL;
172 }
173
174 {
175 uint64_t start = self -> group_info -> groups [ self -> cur_group ] . min_row;
176 uint64_t count = self -> group_info -> groups [ self -> cur_group ] . max_row - start;
177 return SRA_ReadIteratorMakeReadGroup ( ctx,
178 self -> curs,
179 self -> run_name,
180 self -> name,
181 start,
182 count,
183 wants_full,
184 wants_partial,
185 wants_unaligned );
186 }
187 }
188
SRA_ReadGroupGetRead(const SRA_ReadGroup * self,ctx_t ctx,const char * readIdStr)189 struct NGS_Read* SRA_ReadGroupGetRead ( const SRA_ReadGroup * self, ctx_t ctx, const char* readIdStr )
190 {
191 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
192
193 if ( ! self -> seen_first )
194 {
195 USER_ERROR ( xcIteratorUninitialized, "ReadGroup accessed before a call to ReadIteratorNext()" );
196 return NULL;
197 }
198 else if ( self -> cur_group >= self -> group_info -> count )
199 {
200 USER_ERROR ( xcCursorExhausted, "No more rows available" );
201 return NULL;
202 }
203
204 {
205 TRY ( struct NGS_Id id = NGS_IdParse ( readIdStr, string_size ( readIdStr ), ctx ) )
206 {
207 if ( string_cmp ( NGS_StringData ( self -> run_name, ctx ),
208 NGS_StringSize ( self -> run_name, ctx ),
209 id . run . addr,
210 id . run . size,
211 id . run . len ) != 0 )
212 {
213 INTERNAL_ERROR ( xcArcIncorrect,
214 " expected '%.*s', actual '%.*s'",
215 NGS_StringSize ( self -> run_name, ctx ),
216 NGS_StringData ( self -> run_name, ctx ),
217 id . run . size,
218 id . run . addr );
219 }
220 else
221 {
222 /* make sure the requested read is from this read group */
223 NGS_Read* ret;
224 TRY ( ret = SRA_ReadMake ( ctx, self -> curs, id . rowId, self -> run_name ) )
225 {
226 TRY ( const NGS_String* readGroup = NGS_ReadGetReadGroup ( ret, ctx ) )
227 {
228 if ( string_cmp ( NGS_StringData ( self -> name, ctx ),
229 NGS_StringSize ( self -> name, ctx ),
230 NGS_StringData ( readGroup, ctx ),
231 NGS_StringSize ( readGroup, ctx ),
232 NGS_StringSize ( readGroup, ctx ) ) == 0 )
233 {
234 NGS_StringRelease ( readGroup, ctx );
235 return ret;
236 }
237 INTERNAL_ERROR ( xcWrongReadGroup,
238 "Requested read is from a difference read group (expected '%.*s', actual '%.s')",
239 NGS_StringSize ( self -> name, ctx ),
240 NGS_StringData ( self -> name, ctx ),
241 NGS_StringSize ( readGroup, ctx ),
242 NGS_StringData ( readGroup, ctx ) );
243 NGS_StringRelease ( readGroup, ctx );
244 }
245 NGS_ReadRelease ( ret, ctx );
246 }
247 }
248 }
249 }
250 return NULL;
251 }
252
SRA_ReadGroupGetStatistics(const SRA_ReadGroup * self,ctx_t ctx)253 static struct NGS_Statistics* SRA_ReadGroupGetStatistics ( const SRA_ReadGroup * self, ctx_t ctx )
254 {
255 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
256
257 if ( ! self -> seen_first )
258 {
259 USER_ERROR ( xcIteratorUninitialized, "ReadGroup accessed before a call to ReadIteratorNext()" );
260 return NULL;
261 }
262 else if ( self -> cur_group >= self -> group_info -> count )
263 {
264 USER_ERROR ( xcCursorExhausted, "No more rows available" );
265 return NULL;
266 }
267
268 {
269 const struct SRA_ReadGroupStats * group_stats = & self -> group_info -> groups [ self -> cur_group ];
270 TRY ( NGS_Statistics * ret = SRA_StatisticsMake ( ctx ) )
271 {
272 TRY ( NGS_StatisticsAddU64 ( ret, ctx, "BASE_COUNT", group_stats -> base_count ) )
273 {
274 TRY ( NGS_StatisticsAddU64 ( ret, ctx, "BIO_BASE_COUNT", group_stats -> bio_base_count ) )
275 {
276 TRY ( NGS_StatisticsAddU64 ( ret, ctx, "SPOT_COUNT", group_stats -> row_count ) )
277 {
278 TRY ( NGS_StatisticsAddU64 ( ret, ctx, "SPOT_MAX", group_stats -> max_row) )
279 {
280 TRY ( NGS_StatisticsAddU64 ( ret, ctx, "SPOT_MIN", group_stats -> min_row) )
281 {
282 return ret;
283 }
284 }
285 }
286 }
287 }
288 NGS_StatisticsRelease ( ret, ctx );
289 }
290 }
291
292 return NULL;
293 }
294
SRA_ReadGroupMake(ctx_t ctx,const struct NGS_Cursor * curs,const struct SRA_ReadGroupInfo * group_info,const struct NGS_String * run_name,const char * group_name,size_t group_name_size)295 struct NGS_ReadGroup * SRA_ReadGroupMake ( ctx_t ctx,
296 const struct NGS_Cursor * curs,
297 const struct SRA_ReadGroupInfo* group_info,
298 const struct NGS_String * run_name,
299 const char * group_name, size_t group_name_size )
300 {
301 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
302 SRA_ReadGroup * ref;
303
304 assert ( curs != NULL );
305 assert ( run_name != NULL );
306
307 ref = calloc ( 1, sizeof * ref );
308 if ( ref == NULL )
309 SYSTEM_ERROR ( xcNoMemory, "allocating NGS_ReadGroup on '%.*s'", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
310 else
311 {
312 #if _DEBUGGING
313 char instname [ 256 ];
314 string_printf ( instname, sizeof instname, NULL, "%.*s", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
315 instname [ sizeof instname - 1 ] = 0;
316 #else
317 const char *instname = "";
318 #endif
319 TRY ( SRA_ReadGroupInit ( ctx, ref, "NGS_ReadGroup", instname, run_name, group_name, group_name_size, group_info ) )
320 {
321 TRY ( ref -> curs = NGS_CursorDuplicate ( curs, ctx ) )
322 {
323 TRY ( ref -> cur_group = SRA_ReadGroupInfoFind ( ref -> group_info, ctx, group_name, group_name_size ) )
324 {
325 ref -> seen_first = true;
326 return & ref -> dad;
327 }
328 }
329 SRA_ReadGroupWhack ( ref, ctx );
330 }
331
332 free ( ref );
333 }
334
335 return NULL;
336 }
337
338
339 /*--------------------------------------------------------------------------
340 * NGS_ReadGroupIterator
341 */
342
343 /* Make
344 */
SRA_ReadGroupIteratorMake(ctx_t ctx,const NGS_Cursor * curs,const struct SRA_ReadGroupInfo * group_info,const struct NGS_String * run_name)345 NGS_ReadGroup * SRA_ReadGroupIteratorMake ( ctx_t ctx,
346 const NGS_Cursor * curs,
347 const struct SRA_ReadGroupInfo* group_info,
348 const struct NGS_String * run_name )
349 {
350 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcConstructing );
351
352 SRA_ReadGroup * ref;
353
354 assert ( curs != NULL );
355 assert ( run_name != NULL );
356
357 ref = calloc ( 1, sizeof * ref );
358 if ( ref == NULL )
359 SYSTEM_ERROR ( xcNoMemory, "allocating NGS_ReadGroupIterator on '%.*s'", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
360 else
361 {
362 #if _DEBUGGING
363 char instname [ 256 ];
364 string_printf ( instname, sizeof instname, NULL, "%.*s", NGS_StringSize ( run_name, ctx ), NGS_StringData ( run_name, ctx ) );
365 instname [ sizeof instname - 1 ] = 0;
366 #else
367 const char *instname = "";
368 #endif
369 TRY ( SRA_ReadGroupInit ( ctx, ref, "NGS_ReadGroupIterator", instname, run_name, "", 0, group_info ) )
370 {
371 TRY ( ref -> curs = NGS_CursorDuplicate ( curs, ctx ) )
372 {
373 ref -> iterating = true;
374 return & ref -> dad;
375 }
376 SRA_ReadGroupWhack ( ref, ctx );
377 }
378
379 free ( ref );
380 }
381
382 return NULL;
383 }
384
385 /* Next
386 */
SRA_ReadGroupIteratorNext(SRA_ReadGroup * self,ctx_t ctx)387 bool SRA_ReadGroupIteratorNext ( SRA_ReadGroup * self, ctx_t ctx )
388 {
389 FUNC_ENTRY ( ctx, rcSRA, rcCursor, rcAccessing );
390
391 assert ( self != NULL );
392
393 if ( ! self -> iterating )
394 {
395 USER_ERROR ( xcCursorExhausted, "No more rows available" );
396 return false;
397 }
398
399 if ( self -> seen_first )
400 { /* move to next group */
401 ++ self -> cur_group;
402 }
403 else
404 {
405 self -> seen_first = true;
406 }
407
408 while ( self -> cur_group < self -> group_info -> count )
409 {
410 if ( self -> group_info -> groups [ self -> cur_group ] . min_row == 0 )
411 {
412 ++ self -> cur_group;
413 }
414 else
415 {
416 NGS_StringRelease ( self -> name, ctx );
417 self -> name = NULL;
418 TRY ( self -> name = NGS_StringDuplicate ( self -> group_info -> groups [ self -> cur_group ] . name, ctx ) )
419 {
420 return true;
421 }
422 /* error - make the iterator unusable */
423 self -> cur_group = self -> group_info -> count;
424 return false;
425 }
426 }
427
428 return false;
429 }
430