1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include "cmn_iter.h"
28 #include "sorter.h"
29 #include "merge_sorter.h"
30 #include "join.h"
31 #include "tbl_join.h"
32 #include "concatenator.h"
33 #include "cleanup_task.h"
34 #include "lookup_reader.h"
35 #include "raw_read_iter.h"
36 #include "temp_dir.h"
37 
38 #include <kapp/main.h>
39 #include <kapp/args.h>
40 
41 #include <kfg/config.h> /* KConfigSetNgcFile */
42 
43 #include <klib/out.h>
44 #include <klib/printf.h>
45 #include <search/grep.h>
46 #include <kfs/directory.h>
47 #include <kproc/procmgr.h>
48 #include <vdb/manager.h>
49 
50 #include <stdio.h>
51 #include <os-native.h>
52 #include <sysalloc.h>
53 
54 /* ---------------------------------------------------------------------------------- */
55 
56 static const char * format_usage[] = { "format (special, fastq, lookup, default=special)", NULL };
57 #define OPTION_FORMAT   "format"
58 #define ALIAS_FORMAT    "F"
59 
60 static const char * outputf_usage[] = { "output-file", NULL };
61 #define OPTION_OUTPUT_F "outfile"
62 #define ALIAS_OUTPUT_F  "o"
63 
64 static const char * outputd_usage[] = { "output-dir", NULL };
65 #define OPTION_OUTPUT_D "outdir"
66 #define ALIAS_OUTPUT_D  "O"
67 
68 static const char * progress_usage[] = { "show progress", NULL };
69 #define OPTION_PROGRESS "progress"
70 #define ALIAS_PROGRESS  "p"
71 
72 static const char * bufsize_usage[] = { "size of file-buffer dflt=1MB", NULL };
73 #define OPTION_BUFSIZE  "bufsize"
74 #define ALIAS_BUFSIZE   "b"
75 
76 static const char * curcache_usage[] = { "size of cursor-cache dflt=10MB", NULL };
77 #define OPTION_CURCACHE "curcache"
78 #define ALIAS_CURCACHE  "c"
79 
80 static const char * mem_usage[] = { "memory limit for sorting dflt=100MB", NULL };
81 #define OPTION_MEM      "mem"
82 #define ALIAS_MEM       "m"
83 
84 static const char * temp_usage[] = { "where to put temp. files dflt=curr dir", NULL };
85 #define OPTION_TEMP     "temp"
86 #define ALIAS_TEMP      "t"
87 
88 static const char * threads_usage[] = { "how many thread dflt=6", NULL };
89 #define OPTION_THREADS  "threads"
90 #define ALIAS_THREADS   "e"
91 
92 static const char * detail_usage[] = { "print details", NULL };
93 #define OPTION_DETAILS  "details"
94 #define ALIAS_DETAILS    "x"
95 
96 static const char * split_spot_usage[] = { "split spots into reads", NULL };
97 #define OPTION_SPLIT_SPOT "split-spot"
98 #define ALIAS_SPLIT_SPOT  "s"
99 
100 static const char * split_file_usage[] = { "write reads into different files", NULL };
101 #define OPTION_SPLIT_FILE "split-files"
102 #define ALIAS_SPLIT_FILE  "S"
103 
104 static const char * split_3_usage[] = { "writes single reads in special file", NULL };
105 #define OPTION_SPLIT_3   "split-3"
106 #define ALIAS_SPLIT_3    "3"
107 
108 static const char * whole_spot_usage[] = { "writes whole spots into one file", NULL };
109 #define OPTION_WHOLE_SPOT   "concatenate-reads"
110 
111 static const char * stdout_usage[] = { "print output to stdout", NULL };
112 #define OPTION_STDOUT    "stdout"
113 #define ALIAS_STDOUT     "Z"
114 
115 /*
116 static const char * gzip_usage[] = { "compress output using gzip", NULL };
117 #define OPTION_GZIP      "gzip"
118 #define ALIAS_GZIP       "g"
119 
120 static const char * bzip2_usage[] = { "compress output using bzip2", NULL };
121 #define OPTION_BZIP2     "bzip2"
122 #define ALIAS_BZIP2      "z"
123 
124 static const char * maxfd_usage[] = { "maximal number of file-descriptors", NULL };
125 #define OPTION_MAXFD     "maxfd"
126 #define ALIAS_MAXFD      "a"
127 */
128 
129 static const char * force_usage[] = { "force to overwrite existing file(s)", NULL };
130 #define OPTION_FORCE     "force"
131 #define ALIAS_FORCE      "f"
132 
133 static const char * ridn_usage[] = { "use row-id as name", NULL };
134 #define OPTION_RIDN      "rowid-as-name"
135 #define ALIAS_RIDN       "N"
136 
137 static const char * skip_tech_usage[] = { "skip technical reads", NULL };
138 #define OPTION_SKIP_TECH      "skip-technical"
139 
140 static const char * incl_tech_usage[] = { "include technical reads", NULL };
141 #define OPTION_INCL_TECH      "include-technical"
142 
143 static const char * print_read_nr[] = { "print read-numbers", NULL };
144 #define OPTION_PRNR      "print-read-nr"
145 #define ALIAS_PRNR       "P"
146 
147 static const char * min_rl_usage[] = { "filter by sequence-len", NULL };
148 #define OPTION_MINRDLEN  "min-read-len"
149 #define ALIAS_MINRDLEN   "M"
150 
151 static const char * base_flt_usage[] = { "filter by bases", NULL };
152 #define OPTION_BASE_FLT  "bases"
153 #define ALIAS_BASE_FLT   "B"
154 
155 static const char * table_usage[] = { "which seq-table to use in case of pacbio", NULL };
156 #define OPTION_TABLE    "table"
157 
158 static const char * strict_usage[] = { "terminate on invalid read", NULL };
159 #define OPTION_STRICT   "strict"
160 
161 static const char * append_usage[] = { "append to output-file", NULL };
162 #define OPTION_APPEND   "append"
163 #define ALIAS_APPEND    "A"
164 
165 static const char * ngc_usage[] = { "PATH to ngc file", NULL };
166 #define OPTION_NGC   "ngc"
167 
168 /* ---------------------------------------------------------------------------------- */
169 
170 OptDef ToolOptions[] =
171 {
172     { OPTION_FORMAT,    ALIAS_FORMAT,    NULL, format_usage,     1, true,   false },
173     { OPTION_OUTPUT_F,  ALIAS_OUTPUT_F,  NULL, outputf_usage,    1, true,   false },
174     { OPTION_OUTPUT_D,  ALIAS_OUTPUT_D,  NULL, outputd_usage,    1, true,   false },
175     { OPTION_BUFSIZE,   ALIAS_BUFSIZE,   NULL, bufsize_usage,    1, true,   false },
176     { OPTION_CURCACHE,  ALIAS_CURCACHE,  NULL, curcache_usage,   1, true,   false },
177     { OPTION_MEM,       ALIAS_MEM,       NULL, mem_usage,        1, true,   false },
178     { OPTION_TEMP,      ALIAS_TEMP,      NULL, temp_usage,       1, true,   false },
179     { OPTION_THREADS,   ALIAS_THREADS,   NULL, threads_usage,    1, true,   false },
180     { OPTION_PROGRESS,  ALIAS_PROGRESS,  NULL, progress_usage,   1, false,  false },
181     { OPTION_DETAILS,   ALIAS_DETAILS,   NULL, detail_usage,     1, false,  false },
182     { OPTION_SPLIT_SPOT,ALIAS_SPLIT_SPOT,NULL, split_spot_usage, 1, false,  false },
183     { OPTION_SPLIT_FILE,ALIAS_SPLIT_FILE,NULL, split_file_usage, 1, false,  false },
184     { OPTION_SPLIT_3,   ALIAS_SPLIT_3,   NULL, split_3_usage,    1, false,  false },
185     { OPTION_WHOLE_SPOT,    NULL,        NULL, whole_spot_usage, 1, false,  false },
186     { OPTION_STDOUT,    ALIAS_STDOUT,    NULL, stdout_usage,     1, false,  false },
187 /*    { OPTION_GZIP,      ALIAS_GZIP,      NULL, gzip_usage,       1, false,  false }, */
188 /*    { OPTION_BZIP2,     ALIAS_BZIP2,     NULL, bzip2_usage,      1, false,  false }, */
189 /*    { OPTION_MAXFD,     ALIAS_MAXFD,     NULL, maxfd_usage,      1, true,   false }, */
190     { OPTION_FORCE,     ALIAS_FORCE,     NULL, force_usage,      1, false,  false },
191     { OPTION_RIDN,      ALIAS_RIDN,      NULL, ridn_usage,       1, false,  false },
192     { OPTION_SKIP_TECH, NULL,            NULL, skip_tech_usage,  1, false,  false },
193     { OPTION_INCL_TECH, NULL,            NULL, incl_tech_usage,  1, false,  false },
194     { OPTION_PRNR,      ALIAS_PRNR,      NULL, print_read_nr,    1, false,  false },
195     { OPTION_MINRDLEN,  ALIAS_MINRDLEN,  NULL, min_rl_usage,     1, true,   false },
196     { OPTION_TABLE,     NULL,            NULL, table_usage,      1, true,   false },
197     { OPTION_STRICT,    NULL,            NULL, strict_usage,     1, false,  false },
198     { OPTION_BASE_FLT,  ALIAS_BASE_FLT,  NULL, base_flt_usage,   10, true,  false },
199     { OPTION_APPEND,    ALIAS_APPEND,    NULL, append_usage,     1, false,  false },
200     { OPTION_NGC,       NULL,            NULL, ngc_usage,        1, true,   false },
201 };
202 
203 /* ----------------------------------------------------------------------------------- */
204 
205 const char UsageDefaultName[] = "fasterq-dump";
206 
207 /* ----------------------------------------------------------------------------------- */
208 
UsageSummary(const char * progname)209 rc_t CC UsageSummary( const char * progname )
210 {
211     return KOutMsg( "\n"
212                      "Usage:\n"
213                      "  %s <path> [options]\n"
214                      "\n", progname );
215 }
216 
217 /* ----------------------------------------------------------------------------------- */
218 
Usage(const Args * args)219 rc_t CC Usage ( const Args * args )
220 {
221     rc_t rc;
222     uint32_t idx, count = ( sizeof ToolOptions ) / ( sizeof ToolOptions[ 0 ] );
223     const char * progname = UsageDefaultName;
224     const char * fullpath = UsageDefaultName;
225 
226     if ( NULL == args )
227     {
228         rc = RC( rcApp, rcArgv, rcAccessing, rcSelf, rcNull );
229     }
230     else
231     {
232         rc = ArgsProgram( args, &fullpath, &progname );
233     }
234 
235     if ( 0 != rc )
236     {
237         progname = fullpath = UsageDefaultName;
238     }
239 
240     UsageSummary( progname );
241 
242     KOutMsg( "Options:\n" );
243     for ( idx = 1; idx < count; ++idx ) /* start with 1, do not advertize row-range-option*/
244     {
245         const OptDef * opt = &ToolOptions[ idx ];
246         const char * param = NULL;
247 
248         assert( opt );
249         if ( 0 == strcmp( opt -> name, OPTION_NGC ) )
250         {
251             param = "PATH";
252         }
253         HelpOptionLine( opt -> aliases, opt -> name, param, opt -> help );
254     }
255 
256     KOutMsg("\n");
257     HelpOptionsStandard();
258     HelpVersion( fullpath, KAppVersion() );
259     return rc;
260 }
261 
262 /* -------------------------------------------------------------------------------------------- */
263 
264 #define DFLT_PATH_LEN 4096
265 
266 typedef struct tool_ctx_t
267 {
268     KDirectory * dir;
269     const VDBManager * vdb_mgr;     /* created, but unused to avoid race-condition in threads */
270 
271     const char * requested_temp_path;
272     const char * accession_path;
273     const char * accession_short;
274     const char * output_filename;
275     const char * output_dirname;
276     const char * seq_tbl_name;
277 
278     struct temp_dir * temp_dir; /* temp_dir.h */
279 
280     char lookup_filename[ DFLT_PATH_LEN ];
281     char index_filename[ DFLT_PATH_LEN ];
282     char dflt_output[ DFLT_PATH_LEN ];
283 
284     struct KFastDumpCleanupTask * cleanup_task; /* cleanup_task.h */
285 
286     size_t cursor_cache, buf_size, mem_limit;
287 
288     uint32_t num_threads /*, max_fds */;
289     uint64_t total_ram;
290 
291     format_t fmt; /* helper.h */
292 
293     compress_t compress; /* helper.h */
294 
295     bool force, show_progress, show_details, append, use_stdout;
296 
297     join_options join_options; /* helper.h */
298 } tool_ctx_t;
299 
300 /* taken form libs/kapp/main-priv.h */
301 rc_t KAppGetTotalRam ( uint64_t * totalRam );
302 
get_environment(tool_ctx_t * tool_ctx)303 static rc_t get_environment( tool_ctx_t * tool_ctx )
304 {
305     rc_t rc = KAppGetTotalRam ( &( tool_ctx -> total_ram ) );
306     if ( 0 != rc )
307     {
308         ErrMsg( "KAppGetTotalRam() -> %R", rc );
309     }
310     else
311     {
312         rc = KDirectoryNativeDir( &( tool_ctx -> dir ) );
313         if ( 0 != rc )
314         {
315             ErrMsg( "KDirectoryNativeDir() -> %R", rc );
316         }
317     }
318     return rc;
319 }
320 
show_details(tool_ctx_t * tool_ctx)321 static rc_t show_details( tool_ctx_t * tool_ctx )
322 {
323     rc_t rc = KOutMsg( "cursor-cache : %,ld bytes\n", tool_ctx -> cursor_cache );
324     if ( 0 == rc )
325     {
326         rc = KOutMsg( "buf-size     : %,ld bytes\n", tool_ctx -> buf_size );
327     }
328     if ( 0 == rc )
329     {
330         rc = KOutMsg( "mem-limit    : %,ld bytes\n", tool_ctx -> mem_limit );
331     }
332     if ( 0 == rc )
333     {
334         rc = KOutMsg( "threads      : %d\n", tool_ctx -> num_threads );
335     }
336     if ( 0 == rc )
337     {
338         rc = KOutMsg( "scratch-path : '%s'\n", get_temp_dir( tool_ctx -> temp_dir ) );
339     }
340     if ( 0 == rc )
341     {
342         rc = KOutMsg( "output-format: " );
343     }
344     if ( 0 == rc )
345     {
346         switch ( tool_ctx -> fmt )
347         {
348             case ft_special             : rc = KOutMsg( "SPECIAL\n" ); break;
349             case ft_whole_spot          : rc = KOutMsg( "FASTQ whole spot\n" ); break;
350             case ft_fastq_split_spot    : rc = KOutMsg( "FASTQ split spot\n" ); break;
351             case ft_fastq_split_file    : rc = KOutMsg( "FASTQ split file\n" ); break;
352             case ft_fastq_split_3       : rc = KOutMsg( "FASTQ split 3\n" ); break;
353             default                     : rc = KOutMsg( "unknow format\n" ); break;
354         }
355     }
356     if ( 0 == rc )
357     {
358         rc = KOutMsg( "output-file  : '%s'\n", tool_ctx -> output_filename );
359     }
360     if ( 0 == rc )
361     {
362         rc = KOutMsg( "output-dir   : '%s'\n", tool_ctx -> output_dirname );
363     }
364     if ( 0 == rc )
365     {
366         rc = KOutMsg( "append-mode  : '%s'\n", tool_ctx -> append ? "YES" : "NO" );
367     }
368     if ( 0 == rc )
369     {
370         rc = KOutMsg( "stdout-mode  : '%s'\n", tool_ctx -> append ? "YES" : "NO" );
371     }
372     return rc;
373 }
374 
375 static const char * dflt_seq_tabl_name = "SEQUENCE";
376 
377 #define DFLT_CUR_CACHE ( 5 * 1024 * 1024 )
378 #define DFLT_BUF_SIZE ( 1024 * 1024 )
379 #define DFLT_MEM_LIMIT ( 1024L * 1024 * 50 )
380 #define DFLT_NUM_THREADS 6
get_user_input(tool_ctx_t * tool_ctx,const Args * args)381 static void get_user_input( tool_ctx_t * tool_ctx, const Args * args )
382 {
383     bool split_spot, split_file, split_3, whole_spot;
384 
385 #if 0
386     tool_ctx -> compress = get_compress_t( get_bool_option( args, OPTION_GZIP ),
387                                             get_bool_option( args, OPTION_BZIP2 ) );
388 #endif
389     tool_ctx -> compress = ct_none;
390 
391     tool_ctx -> cursor_cache = get_size_t_option( args, OPTION_CURCACHE, DFLT_CUR_CACHE );
392     tool_ctx -> show_progress = get_bool_option( args, OPTION_PROGRESS );
393     tool_ctx -> show_details = get_bool_option( args, OPTION_DETAILS );
394     tool_ctx -> requested_temp_path = get_str_option( args, OPTION_TEMP, NULL );
395     tool_ctx -> force = get_bool_option( args, OPTION_FORCE );
396     tool_ctx -> output_filename = get_str_option( args, OPTION_OUTPUT_F, NULL );
397     tool_ctx -> output_dirname = get_str_option( args, OPTION_OUTPUT_D, NULL );
398     tool_ctx -> buf_size = get_size_t_option( args, OPTION_BUFSIZE, DFLT_BUF_SIZE );
399     tool_ctx -> mem_limit = get_size_t_option( args, OPTION_MEM, DFLT_MEM_LIMIT );
400     tool_ctx -> num_threads = get_uint32_t_option( args, OPTION_THREADS, DFLT_NUM_THREADS );
401 
402     tool_ctx -> join_options . rowid_as_name = get_bool_option( args, OPTION_RIDN );
403     tool_ctx -> join_options . skip_tech = !( get_bool_option( args, OPTION_INCL_TECH ) );
404     tool_ctx -> join_options . print_read_nr = get_bool_option( args, OPTION_PRNR );
405     tool_ctx -> join_options . print_name = true;
406     tool_ctx -> join_options . min_read_len = get_uint32_t_option( args, OPTION_MINRDLEN, 0 );
407     tool_ctx -> join_options . filter_bases = get_str_option( args, OPTION_BASE_FLT, NULL );
408 
409 #if 0
410     tool_ctx -> join_options . terminate_on_invalid = get_bool_option( args, OPTION_STRICT );
411 #else
412     tool_ctx -> join_options . terminate_on_invalid = true;
413 #endif
414 
415     split_spot = get_bool_option( args, OPTION_SPLIT_SPOT );
416     split_file = get_bool_option( args, OPTION_SPLIT_FILE );
417     split_3    = get_bool_option( args, OPTION_SPLIT_3 );
418     whole_spot = get_bool_option( args, OPTION_WHOLE_SPOT );
419 
420     tool_ctx -> fmt = get_format_t( get_str_option( args, OPTION_FORMAT, NULL ),
421                             split_spot, split_file, split_3, whole_spot ); /* helper.c */
422     if ( ft_fastq_split_3 == tool_ctx -> fmt )
423     {
424         tool_ctx -> join_options . skip_tech = true;
425     }
426 
427     tool_ctx -> seq_tbl_name = get_str_option( args, OPTION_TABLE, dflt_seq_tabl_name );
428     tool_ctx -> append = get_bool_option( args, OPTION_APPEND );
429     tool_ctx -> use_stdout = get_bool_option( args, OPTION_STDOUT );
430 
431     {
432         const char * ngc = get_str_option( args, OPTION_NGC, NULL );
433         if ( NULL != ngc )
434         {
435             KConfigSetNgcFile( ngc );
436         }
437     }
438 }
439 
440 #define DFLT_MAX_FD 32
441 #define MIN_NUM_THREADS 2
442 #define MIN_MEM_LIMIT ( 1024L * 1024 * 5 )
443 #define MAX_BUF_SIZE ( 1024L * 1024 * 1024 )
encforce_constrains(tool_ctx_t * tool_ctx)444 static void encforce_constrains( tool_ctx_t * tool_ctx )
445 {
446     if ( tool_ctx -> num_threads < MIN_NUM_THREADS )
447     {
448         tool_ctx -> num_threads = MIN_NUM_THREADS;
449     }
450 
451     if ( tool_ctx -> mem_limit < MIN_MEM_LIMIT )
452     {
453         tool_ctx -> mem_limit = MIN_MEM_LIMIT;
454     }
455 
456     if ( tool_ctx -> buf_size > MAX_BUF_SIZE )
457     {
458         tool_ctx -> buf_size = MAX_BUF_SIZE;
459     }
460 
461     if ( tool_ctx -> use_stdout )
462     {
463         switch( tool_ctx -> fmt )
464         {
465             case ft_unknown             : break;
466             case ft_special             : break;
467             case ft_whole_spot          : break;
468             case ft_fastq_split_spot    : break;
469             case ft_fastq_split_file    : tool_ctx -> use_stdout = false; break;
470             case ft_fastq_split_3       : tool_ctx -> use_stdout = false; break;
471         }
472     }
473 
474     if ( tool_ctx -> use_stdout )
475     {
476         tool_ctx -> compress = ct_none;
477         //tool_ctx -> show_progress = false;
478         tool_ctx -> force = false;
479         tool_ctx -> append = false;
480     }
481 }
482 
handle_accession(tool_ctx_t * tool_ctx)483 static rc_t handle_accession( tool_ctx_t * tool_ctx )
484 {
485     rc_t rc = 0;
486     tool_ctx -> accession_short = extract_acc2( tool_ctx -> accession_path ); /* helper.c */
487 
488     // in case something goes wrong with acc-extraction via VFS-manager
489     if ( NULL == tool_ctx -> accession_short )
490     {
491         tool_ctx -> accession_short = extract_acc( tool_ctx -> accession_path ); /* helper.c */
492     }
493 
494     if ( NULL == tool_ctx -> accession_short )
495     {
496         rc = RC( rcApp, rcArgv, rcAccessing, rcParam, rcInvalid );
497         ErrMsg( "accession '%s' invalid", tool_ctx -> accession_path );
498     }
499     return rc;
500 }
501 
handle_lookup_path(tool_ctx_t * tool_ctx)502 static rc_t handle_lookup_path( tool_ctx_t * tool_ctx )
503 {
504     rc_t rc = generate_lookup_filename( tool_ctx -> temp_dir,
505                                         &tool_ctx -> lookup_filename[ 0 ],
506                                         sizeof tool_ctx -> lookup_filename );
507     if ( 0 != rc )
508     {
509         ErrMsg( "fasterq-dump.c handle_lookup_path( lookup_filename ) -> %R", rc );
510     }
511     else
512     {
513         size_t num_writ;
514         /* generate the full path of the lookup-index-table */
515         rc = string_printf( &tool_ctx -> index_filename[ 0 ], sizeof tool_ctx -> index_filename,
516                             &num_writ,
517                             "%s.idx",
518                             &tool_ctx -> lookup_filename[ 0 ] );
519         if ( 0 != rc )
520         {
521             ErrMsg( "fasterq-dump.c handle_lookup_path( index_filename ) -> %R", rc );
522         }
523     }
524     return rc;
525 }
526 
527 /* we have NO output-dir and NO output-file */
make_output_filename_from_accession(tool_ctx_t * tool_ctx)528 static rc_t make_output_filename_from_accession( tool_ctx_t * tool_ctx )
529 {
530     /* we DO NOT have a output-directory : build output-filename from the accession */
531     /* generate the full path of the output-file, if not given */
532     size_t num_writ;
533     rc_t rc = string_printf( &tool_ctx -> dflt_output[ 0 ], sizeof tool_ctx -> dflt_output,
534                         &num_writ,
535                         "%s.fastq",
536                         tool_ctx -> accession_short );
537     if ( 0 != rc )
538     {
539         ErrMsg( "string_printf( output-filename ) -> %R", rc );
540     }
541     else
542     {
543         tool_ctx -> output_filename = tool_ctx -> dflt_output;
544     }
545     return rc;
546 }
547 
548 /* we have an output-dir and NO output-file */
make_output_filename_from_dir_and_accession(tool_ctx_t * tool_ctx)549 static rc_t make_output_filename_from_dir_and_accession( tool_ctx_t * tool_ctx )
550 {
551     size_t num_writ;
552     bool es = ends_in_slash( tool_ctx -> output_dirname ); /* helper.c */
553     rc_t rc = string_printf( tool_ctx -> dflt_output, sizeof tool_ctx -> dflt_output,
554                         &num_writ,
555                         es ? "%s%s.fastq" : "%s/%s.fastq",
556                         tool_ctx -> output_dirname,
557                         tool_ctx -> accession_short );
558     if ( 0 != rc )
559     {
560         ErrMsg( "string_printf( output-filename ) -> %R", rc );
561     }
562     else
563     {
564         tool_ctx -> output_filename = tool_ctx -> dflt_output;
565     }
566     return rc;
567 }
568 
optionally_create_paths_in_output_filename(tool_ctx_t * tool_ctx)569 static rc_t optionally_create_paths_in_output_filename( tool_ctx_t * tool_ctx )
570 {
571     rc_t rc = 0;
572     String path;
573     if ( extract_path( tool_ctx -> output_filename, &path ) )
574     {
575         /* the output-filename contains a path... */
576         if ( !dir_exists( tool_ctx -> dir, "%S", &path ) )
577         {
578             /* this path does not ( yet ) exist, create it... */
579             rc = create_this_dir( tool_ctx -> dir, &path, true );
580         }
581     }
582     return rc;
583 }
584 
adjust_output_filename(tool_ctx_t * tool_ctx)585 static rc_t adjust_output_filename( tool_ctx_t * tool_ctx )
586 {
587     rc_t rc = 0;
588     /* we do have a output-filename : use it */
589     if ( dir_exists( tool_ctx -> dir, "%s", tool_ctx -> output_filename ) ) /* helper.c */
590     {
591         /* the given output-filename is an existing directory ! */
592         rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
593         ErrMsg( "string_printf( output-filename ) -> %R", rc );
594     }
595     else
596     {
597         rc = optionally_create_paths_in_output_filename( tool_ctx );
598     }
599     return rc;
600 }
601 
adjust_output_filename_by_dir(tool_ctx_t * tool_ctx)602 static rc_t adjust_output_filename_by_dir( tool_ctx_t * tool_ctx )
603 {
604     size_t num_writ;
605     bool es = ends_in_slash( tool_ctx -> output_dirname ); /* helper.c */
606     rc_t rc = string_printf( tool_ctx -> dflt_output, sizeof tool_ctx -> dflt_output,
607                         &num_writ,
608                         es ? "%s%s" : "%s/%s",
609                         tool_ctx -> output_dirname,
610                         tool_ctx -> output_filename );
611     if ( 0 != rc )
612     {
613         ErrMsg( "string_printf( output-filename ) -> %R", rc );
614     }
615     else
616     {
617         tool_ctx -> output_filename = tool_ctx -> dflt_output;
618         rc = optionally_create_paths_in_output_filename( tool_ctx );
619     }
620     return rc;
621 }
622 
populate_tool_ctx(tool_ctx_t * tool_ctx,const Args * args)623 static rc_t populate_tool_ctx( tool_ctx_t * tool_ctx, const Args * args )
624 {
625     rc_t rc = ArgsParamValue( args, 0, ( const void ** )&( tool_ctx -> accession_path ) );
626     if ( 0 != rc )
627     {
628         ErrMsg( "ArgsParamValue() -> %R", rc );
629     }
630     else
631     {
632         tool_ctx -> lookup_filename[ 0 ] = 0;
633         tool_ctx -> index_filename[ 0 ] = 0;
634         tool_ctx -> dflt_output[ 0 ] = 0;
635 
636         get_user_input( tool_ctx, args );
637         encforce_constrains( tool_ctx );
638         get_environment( tool_ctx );
639 
640         rc = make_temp_dir( &tool_ctx -> temp_dir,
641                           tool_ctx -> requested_temp_path,
642                           tool_ctx -> dir );
643     }
644 
645     if ( 0 == rc )
646     {
647         rc = handle_accession( tool_ctx );
648     }
649 
650     if ( 0 == rc )
651     {
652         rc = handle_lookup_path( tool_ctx );
653     }
654 
655     if ( 0 == rc && NULL != tool_ctx -> output_dirname )
656     {
657         if ( !dir_exists( tool_ctx -> dir, "%s", tool_ctx -> output_dirname ) )
658         {
659             rc = create_this_dir_2( tool_ctx -> dir, tool_ctx -> output_dirname, true );
660         }
661     }
662 
663     if ( rc == 0 )
664     {
665         if ( NULL == tool_ctx -> output_filename )
666         {
667             if ( NULL == tool_ctx -> output_dirname )
668             {
669                 rc = make_output_filename_from_accession( tool_ctx );
670             }
671             else
672             {
673                 rc = make_output_filename_from_dir_and_accession( tool_ctx );
674             }
675         }
676         else
677         {
678             if ( NULL == tool_ctx -> output_dirname )
679             {
680                 rc = adjust_output_filename( tool_ctx );
681             }
682             else
683             {
684                 rc = adjust_output_filename_by_dir( tool_ctx );
685             }
686         }
687     }
688 
689     if ( 0 == rc )
690     {
691         rc = Make_FastDump_Cleanup_Task ( &( tool_ctx -> cleanup_task ) ); /* cleanup_task.c */
692     }
693 
694     if ( 0 == rc )
695     {
696         rc = Add_Directory_to_Cleanup_Task ( tool_ctx -> cleanup_task,
697                 get_temp_dir( tool_ctx -> temp_dir ) );
698     }
699 
700     if ( 0 == rc )
701     {
702         rc = VDBManagerMakeRead( &( tool_ctx -> vdb_mgr ), tool_ctx -> dir );
703         if ( 0 != rc )
704         {
705             ErrMsg( "fasterq-dump.c populate_tool_ctx().VDBManagerMakeRead() -> %R\n", rc );
706         }
707     }
708     return rc;
709 }
710 
print_stats(const join_stats * stats)711 static rc_t print_stats( const join_stats * stats )
712 {
713     KOutHandlerSetStdErr();
714     rc_t rc = KOutMsg( "spots read      : %,lu\n", stats -> spots_read );
715     if ( 0 == rc )
716     {
717          rc = KOutMsg( "reads read      : %,lu\n", stats -> reads_read );
718     }
719     if ( 0 == rc )
720     {
721          rc = KOutMsg( "reads written   : %,lu\n", stats -> reads_written );
722     }
723     if ( 0 == rc && stats -> reads_zero_length > 0 )
724     {
725          rc = KOutMsg( "reads 0-length  : %,lu\n", stats -> reads_zero_length );
726     }
727     if ( 0 == rc && stats -> reads_technical > 0 )
728     {
729          rc = KOutMsg( "technical reads : %,lu\n", stats -> reads_technical );
730     }
731     if ( 0 == rc && stats -> reads_too_short > 0 )
732     {
733          rc = KOutMsg( "reads too short : %,lu\n", stats -> reads_too_short );
734     }
735     if ( 0 == rc && stats -> reads_invalid > 0 )
736     {
737          rc = KOutMsg( "reads invalid   : %,lu\n", stats -> reads_invalid );
738     }
739     KOutHandlerSetStdOut();
740     return rc;
741 }
742 
743 /* --------------------------------------------------------------------------------------------
744     produce special-output ( SPOT_ID,READ,SPOT_GROUP ) by iterating over the SEQUENCE - table:
745     produce fastq-output by iterating over the SEQUENCE - table:
746    --------------------------------------------------------------------------------------------
747    each thread iterates over a slice of the SEQUENCE-table
748    for each SPOT it may look up an entry in the lookup-table to get the READ
749    if it is not stored in the SEQ-tbl
750 -------------------------------------------------------------------------------------------- */
751 
752 static const uint32_t queue_timeout = 200;  /* ms */
753 
produce_lookup_files(tool_ctx_t * tool_ctx)754 static rc_t produce_lookup_files( tool_ctx_t * tool_ctx )
755 {
756     rc_t rc = 0;
757     struct bg_update * gap = NULL;
758     struct background_file_merger * bg_file_merger;
759     struct background_vector_merger * bg_vec_merger;
760 
761     if ( tool_ctx -> show_progress )
762     {
763         rc = bg_update_make( &gap, 0 );
764     }
765 
766     /* the background-file-merger catches the files produced by
767         the background-vector-merger */
768     if ( 0 == rc )
769     {
770         rc = make_background_file_merger( &bg_file_merger,
771                                 tool_ctx -> dir,
772                                 tool_ctx -> temp_dir,
773                                 tool_ctx -> cleanup_task,
774                                 &tool_ctx -> lookup_filename[ 0 ],
775                                 &tool_ctx -> index_filename[ 0 ],
776                                 tool_ctx -> num_threads,
777                                 queue_timeout,
778                                 tool_ctx -> buf_size,
779                                 gap ); /* merge_sorter.c */
780     }
781 
782     /* the background-vector-merger catches the KVectors produced by
783        the lookup-produceer */
784     if ( 0 == rc )
785     {
786         rc = make_background_vector_merger( &bg_vec_merger,
787                  tool_ctx -> dir,
788                  tool_ctx -> temp_dir,
789                  tool_ctx -> cleanup_task,
790                  bg_file_merger,
791                  tool_ctx -> num_threads,
792                  queue_timeout,
793                  tool_ctx -> buf_size,
794                  gap ); /* merge_sorter.c */
795     }
796 
797 /* --------------------------------------------------------------------------------------------
798     produce the lookup-table by iterating over the PRIMARY_ALIGNMENT - table:
799    --------------------------------------------------------------------------------------------
800     reading SEQ_SPOT_ID, SEQ_READ_ID and RAW_READ
801     SEQ_SPOT_ID and SEQ_READ_ID is merged into a 64-bit-key
802     RAW_READ is read as 4na-unpacked ( Schema does not provide 4na-packed for this column )
803     these key-pairs are temporarely stored in a KVector until a limit is reached
804     after that limit is reached they are pushed to the background-vector-merger
805     This KVector looks like this:
806     content: [KEY][RAW_READ]
807     KEY... 64-bit value as SEQ_SPOT_ID shifted left by 1 bit, zero-bit contains SEQ_READ_ID
808     RAW_READ... 16-bit binary-chunk-lenght, followed by n bytes of packed 4na
809 -------------------------------------------------------------------------------------------- */
810     /* the lookup-producer is the source of the chain */
811     if ( 0 == rc )
812     {
813         rc = execute_lookup_production( tool_ctx -> dir,
814                                         tool_ctx -> vdb_mgr,
815                                         tool_ctx -> accession_short,
816                                         tool_ctx -> accession_path,
817                                         bg_vec_merger, /* drives the bg_file_merger */
818                                         tool_ctx -> cursor_cache,
819                                         tool_ctx -> buf_size,
820                                         tool_ctx -> mem_limit,
821                                         tool_ctx -> num_threads,
822                                         tool_ctx -> show_progress ); /* sorter.c */
823     }
824     bg_update_start( gap, "merge  : " ); /* progress_thread.c ...start showing the activity... */
825 
826     if ( 0 == rc )
827     {
828         rc = wait_for_and_release_background_vector_merger( bg_vec_merger ); /* merge_sorter.c */
829     }
830 
831     if ( 0 == rc )
832     {
833         rc = wait_for_and_release_background_file_merger( bg_file_merger ); /* merge_sorter.c */
834     }
835 
836     bg_update_release( gap );
837 
838     if ( 0 != rc )
839     {
840         ErrMsg( "fasterq-dump.c produce_lookup_files() -> %R", rc );
841     }
842 
843     return rc;
844 }
845 
846 
847 /* -------------------------------------------------------------------------------------------- */
848 
849 
produce_final_db_output(tool_ctx_t * tool_ctx)850 static rc_t produce_final_db_output( tool_ctx_t * tool_ctx )
851 {
852     struct temp_registry * registry = NULL;
853     join_stats stats;
854 
855     rc_t rc = make_temp_registry( &registry, tool_ctx -> cleanup_task ); /* temp_registry.c */
856 
857     clear_join_stats( &stats ); /* helper.c */
858     /* join SEQUENCE-table with lookup-table === this is the actual purpos of the tool === */
859 
860 /* --------------------------------------------------------------------------------------------
861     produce special-output ( SPOT_ID,READ,SPOT_GROUP ) by iterating over the SEQUENCE - table:
862     produce fastq-output by iterating over the SEQUENCE - table:
863    --------------------------------------------------------------------------------------------
864    each thread iterates over a slice of the SEQUENCE-table
865    for each SPOT it may look up an entry in the lookup-table to get the READ
866    if it is not stored in the SEQ-tbl
867 -------------------------------------------------------------------------------------------- */
868 
869     if ( rc == 0 )
870     {
871         rc = execute_db_join( tool_ctx -> dir,
872                            tool_ctx -> vdb_mgr,
873                            tool_ctx -> accession_path,
874                            tool_ctx -> accession_short,
875                            &stats,
876                            &tool_ctx -> lookup_filename[ 0 ],
877                            &tool_ctx -> index_filename[ 0 ],
878                            tool_ctx -> temp_dir,
879                            registry,
880                            tool_ctx -> cursor_cache,
881                            tool_ctx -> buf_size,
882                            tool_ctx -> num_threads,
883                            tool_ctx -> show_progress,
884                            tool_ctx -> fmt,
885                            & tool_ctx -> join_options ); /* join.c */
886     }
887 
888     /* from now on we do not need the lookup-file and it's index any more... */
889     if ( 0 != tool_ctx -> lookup_filename[ 0 ] )
890     {
891         KDirectoryRemove( tool_ctx -> dir, true, "%s", &tool_ctx -> lookup_filename[ 0 ] );
892     }
893 
894     if ( 0 != tool_ctx -> index_filename[ 0 ] )
895     {
896         KDirectoryRemove( tool_ctx -> dir, true, "%s", &tool_ctx -> index_filename[ 0 ] );
897     }
898 
899     /* STEP 4 : concatenate output-chunks */
900     if ( 0 == rc )
901     {
902         if ( tool_ctx -> use_stdout )
903         {
904             rc = temp_registry_to_stdout( registry,
905                                           tool_ctx -> dir,
906                                           tool_ctx -> buf_size ); /* temp_registry.c */
907         }
908         else
909         {
910             rc = temp_registry_merge( registry,
911                               tool_ctx -> dir,
912                               tool_ctx -> output_filename,
913                               tool_ctx -> buf_size,
914                               tool_ctx -> show_progress,
915                               tool_ctx -> force,
916                               tool_ctx -> compress,
917                               tool_ctx -> append ); /* temp_registry.c */
918         }
919     }
920 
921     /* in case some of the partial results have not been deleted be the concatenator */
922     if ( NULL != registry )
923     {
924         destroy_temp_registry( registry ); /* temp_registry.c */
925     }
926 
927     if ( 0 == rc )
928     {
929         print_stats( &stats ); /* above */
930     }
931 
932     return rc;
933 }
934 
935 /* -------------------------------------------------------------------------------------------- */
936 
output_exists_whole(tool_ctx_t * tool_ctx)937 static bool output_exists_whole( tool_ctx_t * tool_ctx )
938 {
939     return file_exists( tool_ctx -> dir, "%s", tool_ctx -> output_filename );
940 }
941 
output_exists_idx(tool_ctx_t * tool_ctx,uint32_t idx)942 static bool output_exists_idx( tool_ctx_t * tool_ctx, uint32_t idx )
943 {
944     bool res = false;
945     SBuffer s_filename;
946     rc_t rc = split_filename_insert_idx( &s_filename, 4096,
947                             tool_ctx -> output_filename, idx ); /* helper.c */
948     if ( 0 == rc )
949     {
950         res = file_exists( tool_ctx -> dir, "%S", &( s_filename . S ) ); /* helper.c */
951         release_SBuffer( &s_filename ); /* helper.c */
952     }
953     return res;
954 }
955 
output_exists_split(tool_ctx_t * tool_ctx)956 static bool output_exists_split( tool_ctx_t * tool_ctx )
957 {
958     bool res = output_exists_whole( tool_ctx );
959     if ( !res )
960     {
961         res = output_exists_idx( tool_ctx, 1 );
962     }
963     if ( !res )
964     {
965         res = output_exists_idx( tool_ctx, 2 );
966     }
967     return res;
968 }
969 
check_output_exits(tool_ctx_t * tool_ctx)970 static rc_t check_output_exits( tool_ctx_t * tool_ctx )
971 {
972     rc_t rc = 0;
973     /* check if the output-file(s) do already exist, in case we are not overwriting */
974     if ( !( tool_ctx -> force ) && !( tool_ctx -> append ) )
975     {
976         bool exists = false;
977         switch( tool_ctx -> fmt )
978         {
979             case ft_unknown             : break;
980             case ft_special             : exists = output_exists_whole( tool_ctx ); break;
981             case ft_whole_spot          : exists = output_exists_whole( tool_ctx ); break;
982             case ft_fastq_split_spot    : exists = output_exists_whole( tool_ctx ); break;
983             case ft_fastq_split_file    : exists = output_exists_split( tool_ctx ); break;
984             case ft_fastq_split_3       : exists = output_exists_split( tool_ctx ); break;
985         }
986         if ( exists )
987         {
988             rc = RC( rcExe, rcFile, rcPacking, rcName, rcExists );
989             ErrMsg( "fasterq-dump.c fastdump_csra() checking ouput-file '%s' -> %R",
990                      tool_ctx -> output_filename, rc );
991         }
992     }
993     return rc;
994 }
995 
fastdump_csra(tool_ctx_t * tool_ctx)996 static rc_t fastdump_csra( tool_ctx_t * tool_ctx )
997 {
998     rc_t rc = 0;
999 
1000     if ( tool_ctx -> show_details )
1001     {
1002         rc = show_details( tool_ctx ); /* above */
1003     }
1004 
1005     if ( 0 == rc )
1006     {
1007         rc = check_output_exits( tool_ctx ); /* above */
1008     }
1009 
1010     if ( 0 == rc )
1011     {
1012         rc = produce_lookup_files( tool_ctx ); /* above */
1013     }
1014 
1015     if ( 0 == rc )
1016     {
1017         rc = produce_final_db_output( tool_ctx ); /* above */
1018     }
1019     return rc;
1020 }
1021 
1022 
1023 /* -------------------------------------------------------------------------------------------- */
1024 
fastdump_table(tool_ctx_t * tool_ctx,const char * tbl_name)1025 static rc_t fastdump_table( tool_ctx_t * tool_ctx, const char * tbl_name )
1026 {
1027     rc_t rc = 0;
1028     struct temp_registry * registry = NULL;
1029     join_stats stats;
1030 
1031     clear_join_stats( &stats ); /* helper.c */
1032 
1033     if ( tool_ctx -> show_details )
1034     {
1035         rc = show_details( tool_ctx ); /* above */
1036     }
1037 
1038     if ( 0 == rc )
1039     {
1040         rc = check_output_exits( tool_ctx ); /* above */
1041     }
1042 
1043     if ( 0 == rc )
1044     {
1045         rc = make_temp_registry( &registry, tool_ctx -> cleanup_task ); /* temp_registry.c */
1046     }
1047 
1048     if ( 0 == rc )
1049     {
1050         rc = execute_tbl_join( tool_ctx -> dir,
1051                            tool_ctx -> vdb_mgr,
1052                            tool_ctx -> accession_short,
1053                            tool_ctx -> accession_path,
1054                            &stats,
1055                            tbl_name,
1056                            tool_ctx -> temp_dir,
1057                            registry,
1058                            tool_ctx -> cursor_cache,
1059                            tool_ctx -> buf_size,
1060                            tool_ctx -> num_threads,
1061                            tool_ctx -> show_progress,
1062                            tool_ctx -> fmt,
1063                            & tool_ctx -> join_options ); /* tbl_join.c */
1064     }
1065 
1066     if ( 0 == rc )
1067     {
1068         if ( tool_ctx -> use_stdout )
1069         {
1070             rc = temp_registry_to_stdout( registry,
1071                                           tool_ctx -> dir,
1072                                           tool_ctx -> buf_size ); /* temp_registry.c */
1073         }
1074         else
1075         {
1076             rc = temp_registry_merge( registry,
1077                               tool_ctx -> dir,
1078                               tool_ctx -> output_filename,
1079                               tool_ctx -> buf_size,
1080                               tool_ctx -> show_progress,
1081                               tool_ctx -> force,
1082                               tool_ctx -> compress,
1083                               tool_ctx -> append ); /* temp_registry.c */
1084         }
1085     }
1086 
1087     if ( NULL != registry )
1088     {
1089         destroy_temp_registry( registry ); /* temp_registry.c */
1090     }
1091 
1092     if ( 0 == rc )
1093     {
1094         print_stats( &stats ); /* above */
1095     }
1096 
1097     return rc;
1098 }
1099 
1100 static const char * consensus_table = "CONSENSUS";
1101 
get_db_seq_tbl_name(tool_ctx_t * tool_ctx)1102 static const char * get_db_seq_tbl_name( tool_ctx_t * tool_ctx )
1103 {
1104     const char * res = tool_ctx -> seq_tbl_name;
1105     VNamelist * tables = cmn_get_table_names( tool_ctx -> dir, tool_ctx -> vdb_mgr,
1106                                               tool_ctx -> accession_short,
1107                                               tool_ctx -> accession_path ); /* cmn_iter.c */
1108     if ( NULL != tables )
1109     {
1110         int32_t idx;
1111         rc_t rc = VNamelistContainsStr( tables, consensus_table, &idx );
1112         if ( 0 == rc && idx > -1 )
1113         {
1114             res = consensus_table;
1115         }
1116         VNamelistRelease ( tables );
1117     }
1118     return res;
1119 }
1120 
1121 /* -------------------------------------------------------------------------------------------- */
1122 
perform_tool(tool_ctx_t * tool_ctx)1123 static rc_t perform_tool( tool_ctx_t * tool_ctx )
1124 {
1125     acc_type_t acc_type; /* cmn_iter.h */
1126     rc_t rc = cmn_get_acc_type( tool_ctx -> dir, tool_ctx -> vdb_mgr,
1127                                 tool_ctx -> accession_short, tool_ctx -> accession_path,
1128                                 &acc_type ); /* cmn_iter.c */
1129     if ( 0 == rc )
1130     {
1131         /* =================================================== */
1132         switch( acc_type )
1133         {
1134             case acc_csra       : rc = fastdump_csra( tool_ctx ); /* above */
1135                                   break;
1136 
1137             case acc_pacbio     : ErrMsg( "accession '%s' is PACBIO, please use fastq-dump instead", tool_ctx -> accession_path );
1138                                   rc = 3; /* signal to main() that the accession is not-processed */
1139                                   break;
1140 
1141             case acc_sra_flat   : rc = fastdump_table( tool_ctx, NULL ); /* above */
1142                                   break;
1143 
1144             case acc_sra_db     : rc = fastdump_table( tool_ctx, get_db_seq_tbl_name( tool_ctx ) ); /* above */
1145                                   break;
1146 
1147             default             : ErrMsg( "invalid accession '%s'", tool_ctx -> accession_path );
1148                                   rc = 3; /* signal to main() that the accession is not-found/invalid */
1149                                   break;
1150         }
1151         /* =================================================== */
1152     }
1153     else
1154     {
1155         ErrMsg( "invalid accession '%s'", tool_ctx -> accession_path );
1156     }
1157 
1158     return rc;
1159 }
1160 
1161 /* -------------------------------------------------------------------------------------------- */
1162 
KMain(int argc,char * argv[])1163 rc_t CC KMain ( int argc, char *argv [] )
1164 {
1165     Args * args;
1166     uint32_t num_options = sizeof ToolOptions / sizeof ToolOptions [ 0 ];
1167 
1168     rc_t rc = ArgsMakeAndHandle ( &args, argc, argv, 1, ToolOptions, num_options );
1169     if ( 0 != rc )
1170     {
1171         ErrMsg( "ArgsMakeAndHandle() -> %R", rc );
1172     }
1173     else
1174     {
1175         uint32_t param_count;
1176         rc = ArgsParamCount( args, &param_count );
1177         if ( 0 != rc )
1178         {
1179             ErrMsg( "ArgsParamCount() -> %R", rc );
1180         }
1181         else
1182         {
1183             /* in case we are given no or more than one accessions/files to process */
1184             if ( param_count == 0 || param_count > 1 )
1185             {
1186                 Usage ( args );
1187                 /* will make the caller of this function aka KMane() in man.c return
1188                 error code of 3 */
1189                 rc = 3;
1190             }
1191             else
1192             {
1193                 tool_ctx_t tool_ctx;
1194                 rc = populate_tool_ctx( &tool_ctx, args ); /* above */
1195                 if ( 0 == rc )
1196                 {
1197                     rc = perform_tool( &tool_ctx );     /* above */
1198 
1199                     {
1200                         rc_t rc2 = KDirectoryRelease( tool_ctx . dir );
1201                         if ( 0 != rc2 )
1202                         {
1203                             ErrMsg( "KDirectoryRelease() -> %R", rc2 );
1204                             rc = ( 0 == rc ) ? rc2 : rc;
1205                         }
1206                     }
1207                     destroy_temp_dir( tool_ctx . temp_dir ); /* temp_dir.c */
1208                     {
1209                         rc_t rc2 = VDBManagerRelease( tool_ctx . vdb_mgr );
1210                         if ( 0 != rc2 )
1211                         {
1212                             ErrMsg( "VDBManagerRelease() -> %R", rc2 );
1213                             rc = ( 0 == rc ) ? rc2 : rc;
1214                         }
1215                     }
1216                 }
1217                 if ( NULL != tool_ctx . accession_short )
1218                 {
1219                     free( ( char * )tool_ctx . accession_short );
1220                 }
1221             }
1222         }
1223     }
1224     return rc;
1225 }
1226