1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <kapp/main.h>
28 #include <kapp/args.h>
29
30 #include <klib/out.h>
31 #include <klib/log.h>
32 #include <klib/rc.h>
33
34 #include <kfs/directory.h>
35 #include <kfs/filetools.h>
36
37 #include <insdc/insdc.h>
38
39 #include "common.h"
40 #include "slice.h"
41 #include "ref_iter.h"
42 #include "coverage_iter.h"
43
44 const char UsageDefaultName[] = "ref-idx";
45
UsageSummary(const char * progname)46 rc_t CC UsageSummary ( const char * progname )
47 {
48 return KOutMsg( "\n"
49 "Usage:\n"
50 " %s <path> [<path> ...] [options]\n"
51 "\n", progname );
52 }
53
54 #define CSRA_CACHE_SIZE 1024 * 1024 * 32
55
56 #define OPTION_CACHE "cache"
57 static const char * cache_usage[] = { "size of cursor-cache ( dflt=32 MB ) >", NULL };
58
59 #define OPTION_SLICE "slice"
60 #define ALIAS_SLICE "s"
61 static const char * slice_usage[] = { "restrict to this slice", NULL };
62
63 #define OPTION_MIN "min"
64 #define ALIAS_MIN "i"
65 static const char * min_usage[] = { "min coverage to look for ( dflt = 0 )", NULL };
66
67 #define OPTION_MAX "max"
68 #define ALIAS_MAX "x"
69 static const char * max_usage[] = { "max coverage to look for ( dflt = 0xFFFFFFFF )", NULL };
70
71 #define OPTION_FUNC "function"
72 #define ALIAS_FUNC "f"
73 static const char * func_usage[] = { "function to perform: 0...collect min/max for whole run",
74 "1...collect min/max for each reference",
75 "2...report reference-rows based on min/max-constrains",
76 NULL };
77
78 OptDef ToolOptions[] =
79 {
80 /* name alias fkt usage-txt, cnt, needs value, required */
81 { OPTION_CACHE, NULL, NULL, cache_usage, 1, true, false },
82 { OPTION_SLICE, ALIAS_SLICE, NULL, slice_usage, 1, true, false },
83 { OPTION_FUNC, ALIAS_FUNC, NULL, func_usage, 1, true, false },
84 { OPTION_MIN, ALIAS_MIN, NULL, min_usage, 1, true, false },
85 { OPTION_MAX, ALIAS_MAX, NULL, max_usage, 1, true, false }
86 };
87
Usage(const Args * args)88 rc_t CC Usage ( const Args * args )
89 {
90 const char * progname = UsageDefaultName;
91 const char * fullpath = UsageDefaultName;
92 uint32_t i;
93 rc_t rc;
94
95 if ( args == NULL )
96 rc = RC ( rcApp, rcArgv, rcAccessing, rcSelf, rcNull );
97 else
98 rc = ArgsProgram ( args, &fullpath, &progname );
99
100 if ( rc )
101 progname = fullpath = UsageDefaultName;
102
103 UsageSummary ( progname );
104
105 KOutMsg( "Options:\n" );
106
107 HelpOptionsStandard ();
108 for ( i = 0; i < sizeof ( ToolOptions ) / sizeof ( ToolOptions[ 0 ] ); ++i )
109 HelpOptionLine ( ToolOptions[ i ].aliases, ToolOptions[ i ].name, NULL, ToolOptions[ i ].help );
110
111 return rc;
112 }
113
114
115 typedef struct tool_ctx
116 {
117 size_t cursor_cache_size;
118 uint32_t min_coverage, max_coverage, function;
119 slice * slice;
120 } tool_ctx;
121
122
fill_out_tool_ctx(const Args * args,tool_ctx * ctx)123 static rc_t fill_out_tool_ctx( const Args * args, tool_ctx * ctx )
124 {
125 rc_t rc = get_size_t( args, OPTION_CACHE, &ctx->cursor_cache_size, CSRA_CACHE_SIZE );
126 if ( rc == 0 )
127 rc = get_uint32( args, OPTION_FUNC, &ctx->function, 0 );
128 if ( rc == 0 )
129 rc = get_uint32( args, OPTION_MIN, &ctx->min_coverage, 0 );
130 if ( rc == 0 )
131 rc = get_uint32( args, OPTION_MAX, &ctx->max_coverage, 0xFFFFFFFF );
132 if ( rc == 0 )
133 rc = get_slice( args, OPTION_SLICE, &ctx->slice );
134 return rc;
135 }
136
release_tool_ctx(tool_ctx * ctx)137 static void release_tool_ctx( tool_ctx * ctx )
138 {
139 if ( ctx != NULL )
140 {
141 if ( ctx->slice != NULL ) release_slice( ctx->slice );
142 }
143 }
144
145
146 /* ----------------------------------------------------------------------------------------------- */
147
148 typedef struct limit
149 {
150 uint32_t value;
151 int64_t row;
152 uint64_t pos;
153 String * ref;
154 } limit;
155
156
limit_set(limit * l,uint32_t value,int64_t row,uint64_t pos,String * ref)157 static void limit_set( limit * l, uint32_t value, int64_t row, uint64_t pos, String * ref )
158 {
159 l->value = value;
160 l->row = row;
161 l->pos = pos;
162 l->ref = ref;
163 }
164
f0_min_max_for_whole_run(tool_ctx * ctx,const char * src,const Vector * vec)165 static rc_t f0_min_max_for_whole_run( tool_ctx * ctx, const char * src, const Vector *vec )
166 {
167 rc_t rc = 0;
168 uint32_t idx;
169 limit min, max;
170 limit_set( &min, 0xFFFFFFFF, 0, 0, NULL );
171 limit_set( &max, 0, 0, 0, NULL );
172 for ( idx = VectorStart( vec ); rc == 0 && idx < VectorLength( vec ); ++idx )
173 {
174 RefT * ref = VectorGet( vec, idx );
175 if ( ref != NULL )
176 {
177 struct simple_coverage_iter * ci;
178 rc = simple_coverage_iter_make( &ci, src, ctx->cursor_cache_size, ref );
179 if ( rc == 0 )
180 {
181 SimpleCoverageT cv;
182 while ( rc == 0 &&
183 simple_coverage_iter_get_capped( ci, &cv, ctx->min_coverage, ctx->max_coverage ) )
184 {
185 if ( cv.prim > max.value )
186 limit_set( &max, cv.prim, cv.ref_row_id, cv.start_pos, &ref->rname );
187 if ( cv.prim < min.value )
188 limit_set( &min, cv.prim, cv.ref_row_id, cv.start_pos, &ref->rname );
189 }
190 simple_coverage_iter_release( ci );
191 }
192 }
193 }
194 if ( rc == 0 )
195 rc = KOutMsg( "MAX\t%S:%u\tref-row = %ld\talignments = %,u\n",
196 max.ref, max.pos, max.row, max.value );
197 if ( rc == 0 )
198 rc = KOutMsg( "MIN\t%S:%u\tref-row = %ld\talignments = %,u\n",
199 min.ref, min.pos, min.row, min.value );
200 return rc;
201 }
202
f1_min_max_for_each_ref(tool_ctx * ctx,const char * src,const Vector * vec)203 static rc_t f1_min_max_for_each_ref( tool_ctx * ctx, const char * src, const Vector *vec )
204 {
205 rc_t rc = 0;
206 uint32_t idx;
207 limit min, max;
208 for ( idx = VectorStart( vec ); rc == 0 && idx < VectorLength( vec ); ++idx )
209 {
210 RefT * ref = VectorGet( vec, idx );
211 if ( ref != NULL )
212 {
213 struct simple_coverage_iter * ci;
214 rc = simple_coverage_iter_make( &ci, src, ctx->cursor_cache_size, ref );
215 if ( rc == 0 )
216 {
217 SimpleCoverageT cv;
218
219 limit_set( &min, 0xFFFFFFFF, 0, 0, NULL );
220 limit_set( &max, 0, 0, 0, NULL );
221
222 while ( rc == 0 &&
223 simple_coverage_iter_get_capped( ci, &cv, ctx->min_coverage, ctx->max_coverage ) )
224 {
225 if ( cv.prim > max.value )
226 limit_set( &max, cv.prim, cv.ref_row_id, cv.start_pos, &ref->rname );
227 if ( cv.prim < min.value )
228 limit_set( &min, cv.prim, cv.ref_row_id, cv.start_pos, &ref->rname );
229 }
230 simple_coverage_iter_release( ci );
231 if ( rc == 0 )
232 rc = KOutMsg( "%S\n", &ref->rname );
233 if ( rc == 0 )
234 rc = KOutMsg( "\tMAX\tpos = %u\tref-row = %ld\talignments = %,u\n",
235 max.pos, max.row, max.value );
236 if ( rc == 0 )
237 rc = KOutMsg( "\tMIN\tpos = %u\tref-row = %ld\talignments = %,u\n\n",
238 min.pos, min.row, min.value );
239 }
240 }
241 }
242 return rc;
243 }
244
f2_refrows_between_min_max(tool_ctx * ctx,const char * src,const Vector * vec)245 static rc_t f2_refrows_between_min_max( tool_ctx * ctx, const char * src, const Vector *vec )
246 {
247 rc_t rc = 0;
248 uint32_t idx;
249 for ( idx = VectorStart( vec ); rc == 0 && idx < VectorLength( vec ); ++idx )
250 {
251 RefT * ref = VectorGet( vec, idx );
252 if ( ref != NULL )
253 {
254 bool perform = true;
255 if ( ctx->slice != NULL )
256 {
257 perform = ( 0 == StringCompare( ctx->slice->refname, &ref->rname ) );
258 if ( perform && ctx->slice->count > 0 )
259 {
260 int64_t start_offset = ctx->slice->start / ref->block_size;
261 int64_t end_offset = ctx->slice->end / ref->block_size;
262 ref->start_row_id += start_offset;
263 ref->count = ( end_offset - start_offset ) + 1;
264 }
265 }
266 if ( perform )
267 {
268 struct simple_coverage_iter * ci;
269 rc = simple_coverage_iter_make( &ci, src, ctx->cursor_cache_size, ref );
270 if ( rc == 0 )
271 {
272 SimpleCoverageT cv;
273 while ( rc == 0 &&
274 simple_coverage_iter_get_capped( ci, &cv, ctx->min_coverage, ctx->max_coverage ) )
275 {
276 rc = KOutMsg( "%u\t%S:%lu.%u\t%ld\n",
277 cv.prim, &ref->rname, cv.start_pos, cv.len, cv.ref_row_id );
278 }
279 simple_coverage_iter_release( ci );
280 }
281 }
282 }
283 }
284 return rc;
285 }
286
287
detailed_coverage(tool_ctx * ctx,const char * src)288 static rc_t detailed_coverage( tool_ctx * ctx, const char * src )
289 {
290 DetailedCoverage dc;
291 rc_t rc = detailed_coverage_make( src, ctx->cursor_cache_size, ctx->slice, &dc );
292 if ( rc == 0 )
293 {
294 uint32_t idx;
295 for ( idx = 0; rc == 0 && idx < dc.len; ++idx )
296 {
297 rc = KOutMsg( "%d\t%d\n", dc.start_pos + idx, dc.coverage[ idx ] );
298 }
299 detailed_coverage_release( &dc );
300 }
301 return rc;
302 }
303
list_test(tool_ctx * ctx,const char * src)304 static rc_t list_test( tool_ctx * ctx, const char * src )
305 {
306 KDirectory * dir;
307 rc_t rc = KDirectoryNativeDir( &dir );
308 KOutMsg( "list-test\n" );
309 if ( rc == 0 )
310 {
311 VNamelist * list;
312 rc = ReadDirEntriesIntoToNamelist( &list, dir, true, true, true, src );
313 if ( rc == 0 )
314 {
315 uint32_t idx, count;
316 rc = VNameListCount( list, &count );
317 for ( idx = 0; rc == 0 && idx < count; ++idx )
318 {
319 const char * name = NULL;
320 rc = VNameListGet( list, idx, &name );
321 if ( rc == 0 && name != NULL )
322 rc = KOutMsg( "--->%s\n", name );
323 }
324 VNamelistRelease( list );
325 }
326 KDirectoryRelease( dir );
327 }
328 return rc;
329 }
330
common_part(tool_ctx * ctx,const char * src)331 static rc_t common_part( tool_ctx * ctx, const char * src )
332 {
333 rc_t rc = 0;
334 if ( ctx->function < 3 )
335 {
336 Vector vec;
337 rc = ref_iter_make_vector( &vec, src, ctx->cursor_cache_size );
338 if ( rc == 0 )
339 {
340 switch( ctx->function )
341 {
342 case 0 : rc = f0_min_max_for_whole_run( ctx, src, &vec ); break;
343 case 1 : rc = f1_min_max_for_each_ref( ctx, src, &vec ); break;
344 case 2 : rc = f2_refrows_between_min_max( ctx, src, &vec ); break;
345 }
346 ref_iter_release_vector( &vec );
347 }
348 }
349 else
350 {
351 switch( ctx->function )
352 {
353 case 3 : rc = detailed_coverage( ctx, src ); break;
354 case 4 : rc = list_test( ctx, src ); break;
355 }
356 }
357 return rc;
358 }
359
360 /* ----------------------------------------------------------------------------------------------- */
361
362
KMain(int argc,char * argv[])363 rc_t CC KMain ( int argc, char *argv [] )
364 {
365 Args * args;
366 rc_t rc = ArgsMakeAndHandle ( &args, argc, argv, 1,
367 ToolOptions, sizeof ( ToolOptions ) / sizeof ( OptDef ) );
368 if ( rc == 0 )
369 {
370 tool_ctx ctx;
371 rc = fill_out_tool_ctx( args, &ctx );
372 if ( rc == 0 )
373 {
374 if ( rc == 0 )
375 {
376 uint32_t count, idx;
377 rc = ArgsParamCount( args, &count );
378 if ( rc == 0 )
379 {
380 if ( count < 1 )
381 {
382 rc = RC( rcApp, rcNoTarg, rcConstructing, rcParam, rcInvalid );
383 log_err( "no accession given!" );
384 }
385 else
386 {
387 const char * src;
388 for ( idx = 0; rc == 0 && idx < count; ++idx )
389 {
390 rc = ArgsParamValue( args, idx, ( const void ** )&src );
391 if ( rc == 0 )
392 {
393 rc = common_part( &ctx, src ); /* <==== */
394 }
395 }
396 }
397 }
398 }
399 release_tool_ctx( &ctx );
400 }
401 clear_recorded_errors();
402 ArgsWhack ( args );
403 }
404 else
405 log_err( "ArgsMakeAndHandle() failed %R", rc );
406 return rc;
407 }
408