1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include <kapp/main.h>
28 #include <kapp/args.h>
29 
30 #include <klib/out.h>
31 #include <klib/log.h>
32 #include <klib/rc.h>
33 
34 #include <kfs/directory.h>
35 #include <kfs/filetools.h>
36 
37 #include <insdc/insdc.h>
38 
39 #include "common.h"
40 #include "slice.h"
41 #include "ref_iter.h"
42 #include "coverage_iter.h"
43 
44 const char UsageDefaultName[] = "ref-idx";
45 
UsageSummary(const char * progname)46 rc_t CC UsageSummary ( const char * progname )
47 {
48     return KOutMsg( "\n"
49                      "Usage:\n"
50                      "  %s <path> [<path> ...] [options]\n"
51                      "\n", progname );
52 }
53 
54 #define CSRA_CACHE_SIZE 1024 * 1024 * 32
55 
56 #define OPTION_CACHE   "cache"
57 static const char * cache_usage[]   = { "size of cursor-cache ( dflt=32 MB ) >", NULL };
58 
59 #define OPTION_SLICE   "slice"
60 #define ALIAS_SLICE    "s"
61 static const char * slice_usage[]   = { "restrict to this slice", NULL };
62 
63 #define OPTION_MIN     "min"
64 #define ALIAS_MIN      "i"
65 static const char * min_usage[]     = { "min coverage to look for ( dflt = 0 )", NULL };
66 
67 #define OPTION_MAX     "max"
68 #define ALIAS_MAX      "x"
69 static const char * max_usage[]     = { "max coverage to look for ( dflt = 0xFFFFFFFF )", NULL };
70 
71 #define OPTION_FUNC    "function"
72 #define ALIAS_FUNC     "f"
73 static const char * func_usage[]    = { "function to perform: 0...collect min/max for whole run",
74                                          "1...collect min/max for each reference",
75                                          "2...report reference-rows based on min/max-constrains",
76                                          NULL };
77 
78 OptDef ToolOptions[] =
79 {
80 /*    name              alias           fkt    usage-txt,       cnt, needs value, required */
81     { OPTION_CACHE,     NULL,          NULL, cache_usage,     1,   true,        false },
82     { OPTION_SLICE,     ALIAS_SLICE,   NULL, slice_usage,     1,   true,        false },
83     { OPTION_FUNC,      ALIAS_FUNC,    NULL, func_usage,      1,   true,        false },
84     { OPTION_MIN,       ALIAS_MIN,     NULL, min_usage,       1,   true,        false },
85     { OPTION_MAX,       ALIAS_MAX,     NULL, max_usage,       1,   true,        false }
86 };
87 
Usage(const Args * args)88 rc_t CC Usage ( const Args * args )
89 {
90     const char * progname = UsageDefaultName;
91     const char * fullpath = UsageDefaultName;
92     uint32_t i;
93     rc_t rc;
94 
95     if ( args == NULL )
96         rc = RC ( rcApp, rcArgv, rcAccessing, rcSelf, rcNull );
97     else
98         rc = ArgsProgram ( args, &fullpath, &progname );
99 
100     if ( rc )
101         progname = fullpath = UsageDefaultName;
102 
103     UsageSummary ( progname );
104 
105     KOutMsg( "Options:\n" );
106 
107     HelpOptionsStandard ();
108     for ( i = 0; i < sizeof ( ToolOptions ) / sizeof ( ToolOptions[ 0 ] ); ++i )
109         HelpOptionLine ( ToolOptions[ i ].aliases, ToolOptions[ i ].name, NULL, ToolOptions[ i ].help );
110 
111     return rc;
112 }
113 
114 
115 typedef struct tool_ctx
116 {
117     size_t cursor_cache_size;
118     uint32_t min_coverage, max_coverage, function;
119     slice * slice;
120 } tool_ctx;
121 
122 
fill_out_tool_ctx(const Args * args,tool_ctx * ctx)123 static rc_t fill_out_tool_ctx( const Args * args, tool_ctx * ctx )
124 {
125     rc_t rc = get_size_t( args, OPTION_CACHE, &ctx->cursor_cache_size, CSRA_CACHE_SIZE );
126     if ( rc == 0 )
127         rc = get_uint32( args, OPTION_FUNC, &ctx->function, 0 );
128     if ( rc == 0 )
129         rc = get_uint32( args, OPTION_MIN, &ctx->min_coverage, 0 );
130     if ( rc == 0 )
131         rc = get_uint32( args, OPTION_MAX, &ctx->max_coverage, 0xFFFFFFFF );
132     if ( rc == 0 )
133         rc = get_slice( args, OPTION_SLICE, &ctx->slice );
134     return rc;
135 }
136 
release_tool_ctx(tool_ctx * ctx)137 static void release_tool_ctx( tool_ctx * ctx )
138 {
139     if ( ctx != NULL )
140     {
141         if ( ctx->slice != NULL ) release_slice( ctx->slice );
142     }
143 }
144 
145 
146 /* ----------------------------------------------------------------------------------------------- */
147 
148 typedef struct limit
149 {
150     uint32_t value;
151     int64_t row;
152     uint64_t pos;
153     String * ref;
154 } limit;
155 
156 
limit_set(limit * l,uint32_t value,int64_t row,uint64_t pos,String * ref)157 static void limit_set( limit * l, uint32_t value, int64_t row, uint64_t pos, String * ref )
158 {
159     l->value = value;
160     l->row = row;
161     l->pos = pos;
162     l->ref = ref;
163 }
164 
f0_min_max_for_whole_run(tool_ctx * ctx,const char * src,const Vector * vec)165 static rc_t f0_min_max_for_whole_run( tool_ctx * ctx, const char * src, const Vector *vec )
166 {
167     rc_t rc = 0;
168     uint32_t idx;
169     limit min, max;
170     limit_set( &min, 0xFFFFFFFF, 0, 0, NULL );
171     limit_set( &max, 0, 0, 0, NULL );
172     for ( idx = VectorStart( vec ); rc == 0 && idx < VectorLength( vec ); ++idx )
173     {
174         RefT * ref = VectorGet( vec, idx );
175         if ( ref != NULL )
176         {
177             struct simple_coverage_iter * ci;
178             rc = simple_coverage_iter_make( &ci, src, ctx->cursor_cache_size, ref );
179             if ( rc == 0 )
180             {
181                 SimpleCoverageT cv;
182                 while ( rc == 0 &&
183                         simple_coverage_iter_get_capped( ci, &cv, ctx->min_coverage, ctx->max_coverage ) )
184                 {
185                     if ( cv.prim > max.value )
186                         limit_set( &max, cv.prim, cv.ref_row_id, cv.start_pos, &ref->rname );
187                     if ( cv.prim < min.value )
188                         limit_set( &min, cv.prim, cv.ref_row_id, cv.start_pos, &ref->rname );
189                 }
190                 simple_coverage_iter_release( ci );
191             }
192         }
193     }
194     if ( rc == 0 )
195         rc = KOutMsg( "MAX\t%S:%u\tref-row = %ld\talignments = %,u\n",
196             max.ref, max.pos, max.row, max.value );
197     if ( rc == 0 )
198         rc = KOutMsg( "MIN\t%S:%u\tref-row = %ld\talignments = %,u\n",
199             min.ref, min.pos, min.row, min.value );
200     return rc;
201 }
202 
f1_min_max_for_each_ref(tool_ctx * ctx,const char * src,const Vector * vec)203 static rc_t f1_min_max_for_each_ref( tool_ctx * ctx, const char * src, const Vector *vec )
204 {
205     rc_t rc = 0;
206     uint32_t idx;
207     limit min, max;
208     for ( idx = VectorStart( vec ); rc == 0 && idx < VectorLength( vec ); ++idx )
209     {
210         RefT * ref = VectorGet( vec, idx );
211         if ( ref != NULL )
212         {
213             struct simple_coverage_iter * ci;
214             rc = simple_coverage_iter_make( &ci, src, ctx->cursor_cache_size, ref );
215             if ( rc == 0 )
216             {
217                 SimpleCoverageT cv;
218 
219                 limit_set( &min, 0xFFFFFFFF, 0, 0, NULL );
220                 limit_set( &max, 0, 0, 0, NULL );
221 
222                 while ( rc == 0 &&
223                         simple_coverage_iter_get_capped( ci, &cv, ctx->min_coverage, ctx->max_coverage ) )
224                 {
225                     if ( cv.prim > max.value )
226                         limit_set( &max, cv.prim, cv.ref_row_id, cv.start_pos, &ref->rname );
227                     if ( cv.prim < min.value )
228                         limit_set( &min, cv.prim, cv.ref_row_id, cv.start_pos, &ref->rname );
229                 }
230                 simple_coverage_iter_release( ci );
231                 if ( rc == 0 )
232                     rc = KOutMsg( "%S\n", &ref->rname );
233                 if ( rc == 0 )
234                     rc = KOutMsg( "\tMAX\tpos = %u\tref-row = %ld\talignments = %,u\n",
235                         max.pos, max.row, max.value );
236                 if ( rc == 0 )
237                     rc = KOutMsg( "\tMIN\tpos = %u\tref-row = %ld\talignments = %,u\n\n",
238                         min.pos, min.row, min.value );
239             }
240         }
241     }
242     return rc;
243 }
244 
f2_refrows_between_min_max(tool_ctx * ctx,const char * src,const Vector * vec)245 static rc_t f2_refrows_between_min_max( tool_ctx * ctx, const char * src, const Vector *vec )
246 {
247     rc_t rc = 0;
248     uint32_t idx;
249     for ( idx = VectorStart( vec ); rc == 0 && idx < VectorLength( vec ); ++idx )
250     {
251         RefT * ref = VectorGet( vec, idx );
252         if ( ref != NULL )
253         {
254             bool perform = true;
255             if ( ctx->slice != NULL )
256             {
257                 perform = ( 0 == StringCompare( ctx->slice->refname, &ref->rname ) );
258                 if ( perform && ctx->slice->count > 0 )
259                 {
260                     int64_t start_offset = ctx->slice->start / ref->block_size;
261                     int64_t end_offset = ctx->slice->end / ref->block_size;
262                     ref->start_row_id += start_offset;
263                     ref->count = ( end_offset - start_offset ) + 1;
264                 }
265             }
266             if ( perform )
267             {
268                 struct simple_coverage_iter * ci;
269                 rc = simple_coverage_iter_make( &ci, src, ctx->cursor_cache_size, ref );
270                 if ( rc == 0 )
271                 {
272                     SimpleCoverageT cv;
273                     while ( rc == 0 &&
274                              simple_coverage_iter_get_capped( ci, &cv, ctx->min_coverage, ctx->max_coverage ) )
275                     {
276                         rc = KOutMsg( "%u\t%S:%lu.%u\t%ld\n",
277                             cv.prim, &ref->rname, cv.start_pos, cv.len, cv.ref_row_id );
278                     }
279                     simple_coverage_iter_release( ci );
280                 }
281             }
282         }
283     }
284     return rc;
285 }
286 
287 
detailed_coverage(tool_ctx * ctx,const char * src)288 static rc_t detailed_coverage( tool_ctx * ctx, const char * src )
289 {
290     DetailedCoverage dc;
291     rc_t rc = detailed_coverage_make( src, ctx->cursor_cache_size, ctx->slice, &dc );
292     if ( rc == 0 )
293     {
294         uint32_t idx;
295         for ( idx = 0; rc == 0 && idx < dc.len; ++idx )
296         {
297             rc = KOutMsg( "%d\t%d\n", dc.start_pos + idx, dc.coverage[ idx ] );
298         }
299         detailed_coverage_release( &dc );
300     }
301     return rc;
302 }
303 
list_test(tool_ctx * ctx,const char * src)304 static rc_t list_test( tool_ctx * ctx, const char * src )
305 {
306     KDirectory * dir;
307     rc_t rc = KDirectoryNativeDir( &dir );
308     KOutMsg( "list-test\n" );
309     if ( rc == 0 )
310     {
311         VNamelist * list;
312         rc = ReadDirEntriesIntoToNamelist( &list, dir, true, true, true, src );
313         if ( rc == 0 )
314         {
315             uint32_t idx, count;
316             rc = VNameListCount( list, &count );
317             for ( idx = 0; rc == 0 && idx < count; ++idx )
318             {
319                 const char * name = NULL;
320                 rc = VNameListGet( list, idx, &name );
321                 if ( rc == 0 && name != NULL )
322                     rc = KOutMsg( "--->%s\n", name );
323             }
324             VNamelistRelease( list );
325         }
326         KDirectoryRelease( dir );
327     }
328     return rc;
329 }
330 
common_part(tool_ctx * ctx,const char * src)331 static rc_t common_part( tool_ctx * ctx, const char * src )
332 {
333     rc_t rc = 0;
334     if ( ctx->function < 3 )
335     {
336         Vector vec;
337         rc = ref_iter_make_vector( &vec, src, ctx->cursor_cache_size );
338         if ( rc == 0 )
339         {
340             switch( ctx->function )
341             {
342                 case 0 : rc = f0_min_max_for_whole_run( ctx, src, &vec ); break;
343                 case 1 : rc = f1_min_max_for_each_ref( ctx, src, &vec ); break;
344                 case 2 : rc = f2_refrows_between_min_max( ctx, src, &vec ); break;
345             }
346             ref_iter_release_vector( &vec );
347         }
348     }
349     else
350     {
351         switch( ctx->function )
352         {
353             case 3 : rc = detailed_coverage( ctx, src ); break;
354             case 4 : rc = list_test( ctx, src ); break;
355         }
356     }
357     return rc;
358 }
359 
360 /* ----------------------------------------------------------------------------------------------- */
361 
362 
KMain(int argc,char * argv[])363 rc_t CC KMain ( int argc, char *argv [] )
364 {
365     Args * args;
366     rc_t rc = ArgsMakeAndHandle ( &args, argc, argv, 1,
367                 ToolOptions, sizeof ( ToolOptions ) / sizeof ( OptDef ) );
368     if ( rc == 0 )
369     {
370         tool_ctx ctx;
371         rc = fill_out_tool_ctx( args, &ctx );
372         if ( rc == 0 )
373         {
374             if ( rc == 0 )
375             {
376                 uint32_t count, idx;
377                 rc = ArgsParamCount( args, &count );
378                 if ( rc == 0 )
379                 {
380                     if ( count < 1 )
381                     {
382                         rc = RC( rcApp, rcNoTarg, rcConstructing, rcParam, rcInvalid );
383                         log_err( "no accession given!" );
384                     }
385                     else
386                     {
387                         const char * src;
388                         for ( idx = 0; rc == 0 && idx < count; ++idx )
389                         {
390                             rc = ArgsParamValue( args, idx, ( const void ** )&src );
391                             if ( rc == 0 )
392                             {
393                                 rc = common_part( &ctx, src ); /* <==== */
394                             }
395                         }
396                     }
397                 }
398             }
399             release_tool_ctx( &ctx );
400         }
401         clear_recorded_errors();
402         ArgsWhack ( args );
403     }
404     else
405         log_err( "ArgsMakeAndHandle() failed %R", rc );
406     return rc;
407 }
408