1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <kapp/main.h>
28 #include <kapp/args.h>
29 #include <klib/text.h>
30 #include <klib/log.h>
31 #include <klib/out.h>
32 #include <klib/status.h>
33 #include <klib/rc.h>
34 #include <klib/printf.h>
35 #include <kfs/directory.h>
36 #include <kfs/file.h>
37 #include <kapp/log-xml.h>
38 #include <align/writer-refseq.h>
39
40 #include <stdlib.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <ctype.h>
44 #include <assert.h>
45 #include <limits.h>
46
47 #include "Globals.h"
48 #include "loader-imp.h"
49
50 /*: ARGS
51 Summary:
52 Load a BAM formatted data file
53
54 Usage:
55 --help display this text and quit
56 --version display the version string and quit
57 [global-options] [options] <file...> [ --remap [options] <file>... ]...
58
59 //:global-options
60 Global Options:
61
62 * options effecting logging
63 log-level <level> logging level values: [fatal|sys|int|err|warn|info|0-5] default: info
64 xml-log <filename> produce an XML-formatted log file
65
66 * options effecting performance optimisation
67 tmpfs <directory> where to store temparary files, default: '/tmp'
68 cache-size <mbytes> the limit in MB for temparary files
69
70 * options effecting error limits
71 max-err-count <number> the maximum number of errors to ignore
72 max-warning-dup-flag <count> the limit for number of duplicate flag mismatch warnings
73
74 //:options
75 Options:
76 output <name> name of the output, required
77 config <file> reference configuration file (See Configuration)
78 header <file> file containing the SAM header
79 remap special option to enable processing sets of remapped files. remap MUST be given between each set, all regular options can be respecified, in fact the output must be unique for each set. This is for when a set of reads are aligned multiple times, for example against different reference builds or with different aligners. This mode ensures that spot ids are the same across the several outputs.
80
81 Debugging Options:
82 only-verify exit after verifying existence of references
83 max-rec-count <number> exit after processing this many records (per file)
84 nomatch-log <path> log alignments with no matching bases
85
86 Filtering Options:
87 minimum-match <number> minimum number of matches for an alignment
88 no-secondary ignore alignments marked as secondary
89 accept-dups accept spots with inconsistent PCR duplicate flags
90 accept-nomatch accept alignments with no matching bases
91 ref-config limit processing to references in the config file, ignoring all others
92 ref-filter <name> limit processing to the given reference, ignoring all others
93 min-mapq <number> filter secondary alignments by minimum mapping quality
94
95 Rare or Esoteric Options:
96 input <directory> where to find fasta files
97 ref-file <file> fasta file with references
98 unsorted expect unsorted input (requires more memory)
99 sorted require sorted input
100 TI look for trace id optional tag
101 unaligned <file> file without aligned reads
102
103 Deprecated Options:
104 use-OQ use OQ option column for quality values instead of QUAL
105 no-verify skip verify existence of references from the BAM file
106 accept-hard-clip allow hard clipping in CIGAR
107 allow-multi-map allow the same reference to be mapped to multiple names in the input files
108 edit-aligned-qual <number> convert quality at aligned positions to this value
109 cs turn on awareness of colorspace
110 qual-quant quality scores quantization level
111 keep-mismatch-qual don't quantized quality at mismatched positions
112
113
114 Example:
115 bam-load -o /tmp/SRZ123456 -k analysis.bam.cfg 123456.bam
116 */
117
118 /* MARK: Arguments and Usage */
119 static char const option_input[] = "input";
120 static char const option_output[] = "output";
121 static char const option_tmpfs[] = "tmpfs";
122 static char const option_config[] = "config";
123 static char const option_min_mapq[] = "min-mapq";
124 static char const option_qual_compress[] = "qual-quant";
125 static char const option_cache_size[] = "cache-size";
126 static char const option_unsorted[] = "unsorted";
127 static char const option_sorted[] = "sorted";
128 static char const option_max_err_count[] = "max-err-count";
129 static char const option_max_rec_count[] = "max-rec-count";
130 static char const option_no_verify[] = "no-verify";
131 static char const option_only_verify[] = "only-verify";
132 static char const option_use_qual[] = "use-QUAL";
133 static char const option_ref_filter[] = "ref-filter";
134 static char const option_ref_config[] = "ref-config";
135 static char const option_edit_aligned_qual[] = "edit-aligned-qual";
136 static char const option_unaligned[] = "unaligned";
137 static char const option_accept_dup[] = "accept-dups";
138 static char const option_accept_nomatch[] = "accept-nomatch";
139 static char const option_nomatch_log[] = "nomatch-log";
140 static char const option_keep_mismatch_qual[] = "keep-mismatch-qual";
141 static char const option_min_match[] = "minimum-match";
142 static char const option_header[] = "header";
143 static char const option_no_cs[] = "no-cs";
144 static char const option_no_secondary[] = "no-secondary";
145 static char const option_ref_file[] = "ref-file";
146 static char const option_TI[] = "TI";
147 static char const option_max_warn_dup_flag[] = "max-warning-dup-flag";
148 static char const option_accept_hard_clip[] = "accept-hard-clip";
149 static char const option_allow_multi_map[] = "allow-multi-map";
150 static char const option_allow_secondary[] = "make-spots-with-secondary";
151 static char const option_defer_secondary[] = "defer-secondary";
152
153 #define OPTION_INPUT option_input
154 #define OPTION_OUTPUT option_output
155 #define OPTION_TMPFS option_tmpfs
156 #define OPTION_CONFIG option_config
157 #define OPTION_MINMAPQ option_min_mapq
158 #define OPTION_QCOMP option_qual_compress
159 #define OPTION_CACHE_SIZE option_cache_size
160 #define OPTION_MAX_ERR_COUNT option_max_err_count
161 #define OPTION_MAX_REC_COUNT option_max_rec_count
162 #define OPTION_UNALIGNED option_unaligned
163 #define OPTION_ACCEPT_DUP option_accept_dup
164 #define OPTION_ACCEPT_NOMATCH option_accept_nomatch
165 #define OPTION_NOMATCH_LOG option_nomatch_log
166 #define OPTION_MIN_MATCH option_min_match
167 #define OPTION_HEADER option_header
168 #define OPTION_NO_CS option_no_cs
169 #define OPTION_NO_SECONDARY option_no_secondary
170 #define OPTION_REF_FILE option_ref_file
171 #define OPTION_TI option_TI
172 #define OPTION_MAX_WARN_DUP_FLAG option_max_warn_dup_flag
173 #define OPTION_ACCEPT_HARD_CLIP option_accept_hard_clip
174 #define OPTION_ALLOW_MULTI_MAP option_allow_multi_map
175 #define OPTION_ALLOW_SECONDARY option_allow_secondary
176 #define OPTION_DEFER_SECONDARY option_defer_secondary
177
178 #define ALIAS_INPUT "i"
179 #define ALIAS_OUTPUT "o"
180 #define ALIAS_TMPFS "t"
181 #define ALIAS_CONFIG "k"
182 #define ALIAS_MINMAPQ "q"
183 #define ALIAS_QCOMP "Q"
184 #define ALIAS_MAX_ERR_COUNT "E"
185 #define ALIAS_UNALIGNED "u"
186 #define ALIAS_ACCEPT_DUP "d"
187 #define ALIAS_NO_SECONDARY "P"
188 #define ALIAS_REF_FILE "r"
189
190 static
191 char const * input_usage[] =
192 {
193 "Path where to get fasta files from.",
194 NULL
195 };
196
197 static
198 char const * output_usage[] =
199 {
200 "Path and Name of the output database.",
201 NULL
202 };
203
204 static
205 char const * tmpfs_usage[] =
206 {
207 "Path to be used for scratch files.",
208 NULL
209 };
210
211 static
212 char const * config_usage[] =
213 {
214 "Path to configuration file:",
215 "maps the input BAM file's reference names to the equivalent GenBank accession.",
216 "It is a tab-delimited text file with unix line endings (\\n - LF) with the following fields in this order:",
217 "#1 reference name as it occurs in the BAM file's SN field of @SQ header record;",
218 "#2 INSDC reference ID",
219 NULL
220 };
221
222 static
223 char const * min_mapq_usage[] =
224 {
225 "Filter secondary alignments by minimum mapping quality.",
226 NULL
227 };
228
229 static
230 char const * qcomp_usage[] =
231 {
232 "Quality scores quantization level, can be a number (0: none, 1: 2bit, 2: 1bit), or a string like '1:10,10:20,20:30,30:-' (which is equivalent to 1) (nb. the endpoint is exclusive).",
233 NULL
234 };
235
236 static
237 char const * unsorted_usage[] =
238 {
239 "Tell the loader to expect unsorted input (requires more memory)",
240 NULL
241 };
242
243 static
244 char const * sorted_usage[] =
245 {
246 "Tell the loader to require sorted input",
247 NULL
248 };
249
250 static
251 char const * cache_size_usage[] =
252 {
253 "Set the cache size in MB for the temporary indices",
254 NULL
255 };
256
257 static
258 char const * mrc_usage[] =
259 {
260 "Set the maximum number of records to process from the BAM file",
261 NULL
262 };
263
264 static
265 char const * mec_usage[] =
266 {
267 "Set the maximum number of errors to ignore from the BAM file",
268 NULL
269 };
270
271 static
272 char const * no_verify_usage[] =
273 {
274 "Skip verify existence of references from the BAM file",
275 NULL
276 };
277
278 static
279 char const * only_verify_usage[] =
280 {
281 "Exit after verifying existence of references from the BAM file",
282 NULL
283 };
284
285 static
286 char const * use_QUAL_usage[] =
287 {
288 "use QUAL column for quality values (default is to use OQ if it is available)",
289 NULL
290 };
291
292 static
293 char const * use_ref_filter[] =
294 {
295 "Only process alignments to the given reference",
296 NULL
297 };
298
299 static
300 char const * use_ref_config[] =
301 {
302 "Only process alignments to references in the config file",
303 NULL
304 };
305
306 static
307 char const * use_edit_aligned_qual[] =
308 {
309 "Convert quality at aligned positions to this value",
310 NULL
311 };
312
313 static
314 char const * use_keep_mismatch_qual[] =
315 {
316 "Don't quantized quality at mismatched positions",
317 NULL
318 };
319
320 static
321 char const * use_unaligned[] =
322 {
323 "Specify file without aligned reads",
324 NULL
325 };
326
327 static
328 char const * use_accept_dups[] =
329 {
330 "Accept spots inconsistent PCR duplicate flags",
331 NULL
332 };
333
334 static
335 char const * use_accept_nomatch[] =
336 {
337 "Accept alignments with no matching bases",
338 NULL
339 };
340
341 static
342 char const * use_nomatch_log[] =
343 {
344 "Where to write info for alignments with no matching bases",
345 NULL
346 };
347
348 static
349 char const * use_min_match[] =
350 {
351 "minimum number of matches for an alignment",
352 NULL
353 };
354
355 static
356 char const * use_header[] =
357 {
358 "path to a file containing the SAM header to store in the resulting cSRA, recommended in case of multiple input BAMs",
359 NULL
360 };
361
362 static
363 char const * use_no_cs[] =
364 {
365 "turn off awareness of colorspace",
366 NULL
367 };
368
369 static
370 char const * use_no_secondary[] =
371 {
372 "ignore alignments marked as secondary",
373 NULL
374 };
375
376 static
377 char const * use_ref_file[] =
378 {
379 "path to a fasta file with references",
380 NULL
381 };
382
383 static
384 char const * use_TI[] =
385 {
386 "for trace alignments",
387 NULL
388 };
389
390 static
391 char const * use_max_dup_warnings[] =
392 {
393 "set limit for number of duplicate flag mismatch warnings",
394 NULL
395 };
396
397 static
398 char const * use_accept_hard_clip[] =
399 {
400 "accept hard clipping in CIGAR",
401 NULL
402 };
403
404 static
405 char const * use_allow_multi_map[] =
406 {
407 "allow the same reference to be mapped to multiple names in the input files",
408 "(default is disallow, old behaviors was to allow it)",
409 NULL
410 };
411
412 static
413 char const * use_allow_secondary[] =
414 {
415 "use secondary alignments for constructing spots",
416 NULL
417 };
418
419 static
420 char const * use_defer_secondary[] =
421 {
422 "defer processing of secondary alignments until the end of the file",
423 NULL
424 };
425
426 OptDef Options[] =
427 {
428 /* order here is same as in param array below!!! */
429 { OPTION_INPUT, ALIAS_INPUT, NULL, input_usage, 1, true, false },
430 { OPTION_OUTPUT, ALIAS_OUTPUT, NULL, output_usage, 1, true, true },
431 { OPTION_CONFIG, ALIAS_CONFIG, NULL, config_usage, 1, true, false },
432 { OPTION_HEADER, NULL, NULL, use_header, 1, true, false },
433 { OPTION_TMPFS, ALIAS_TMPFS, NULL, tmpfs_usage, 1, true, false },
434 { OPTION_UNALIGNED, ALIAS_UNALIGNED, NULL, use_unaligned, 256, true, false },
435 { OPTION_ACCEPT_DUP, ALIAS_ACCEPT_DUP, NULL, use_accept_dups, 1, false, false },
436 { OPTION_ACCEPT_NOMATCH, NULL, NULL, use_accept_nomatch, 1, false, false },
437 { OPTION_NOMATCH_LOG, NULL, NULL, use_nomatch_log, 1, true, false },
438 { OPTION_QCOMP, ALIAS_QCOMP, NULL, qcomp_usage, 1, true, false },
439 { OPTION_MINMAPQ, ALIAS_MINMAPQ, NULL, min_mapq_usage, 1, true, false },
440 { OPTION_CACHE_SIZE, NULL, NULL, cache_size_usage, 1, true, false },
441 { OPTION_NO_CS, NULL, NULL, use_no_cs, 1, false, false },
442 { OPTION_MIN_MATCH, NULL, NULL, use_min_match, 1, true, false },
443 { OPTION_NO_SECONDARY, ALIAS_NO_SECONDARY, NULL, use_no_secondary, 1, false, false },
444 { option_unsorted, NULL, NULL, unsorted_usage, 1, false, false },
445 { option_sorted, NULL, NULL, sorted_usage, 1, false, false },
446 { option_no_verify, NULL, NULL, no_verify_usage, 1, false, false },
447 { option_only_verify, NULL, NULL, only_verify_usage, 1, false, false },
448 { option_use_qual, NULL, NULL, use_QUAL_usage, 1, false, false },
449 { option_ref_config, NULL, NULL, use_ref_config, 1, false, false },
450 { option_ref_filter, NULL, NULL, use_ref_filter, 1, true, false },
451 { option_edit_aligned_qual, NULL, NULL, use_edit_aligned_qual, 1, true, false },
452 { option_keep_mismatch_qual, NULL, NULL, use_keep_mismatch_qual, 1, false, false },
453 { OPTION_MAX_REC_COUNT, NULL, NULL, mrc_usage, 1, true, false },
454 { OPTION_MAX_ERR_COUNT, ALIAS_MAX_ERR_COUNT, NULL, mec_usage, 1, true, false },
455 { OPTION_REF_FILE, ALIAS_REF_FILE, NULL, use_ref_file, 0, true, false },
456 { OPTION_TI, NULL, NULL, use_TI, 1, false, false },
457 { OPTION_MAX_WARN_DUP_FLAG, NULL, NULL, use_max_dup_warnings, 1, true, false },
458 { OPTION_ACCEPT_HARD_CLIP, NULL, NULL, use_accept_hard_clip, 1, false, false },
459 { OPTION_ALLOW_MULTI_MAP, NULL, NULL, use_allow_multi_map, 1, false, false },
460 { OPTION_ALLOW_SECONDARY, NULL, NULL, use_allow_secondary, 1, false, false },
461 { OPTION_DEFER_SECONDARY, NULL, NULL, use_defer_secondary, 1, false, false }
462 };
463
464 const char* OptHelpParam[] =
465 {
466 /* order here is same as in OptDef array above!!! */
467 "path", /* input */
468 "path", /* output */
469 "path-to-file", /* config */
470 "path-to-file", /* header */
471 "path", /* tmpfs */
472 "path-to-file", /* unaligned */
473 NULL, /* accept dups */
474 NULL, /* accept no-match */
475 "path-to-file", /* no-match log */
476 "level", /* quality compression */
477 "phred-score", /* min. mapq */
478 "mbytes", /* cache size */
479 NULL, /* no colorspace */
480 "count", /* min. match count */
481 NULL, /* no secondary */
482 NULL, /* unsorted */
483 NULL, /* sorted */
484 NULL, /* no verify ref's */
485 NULL, /* quit after verify ref's */
486 NULL, /* force QUAL */
487 NULL, /* ref's from config */
488 "name", /* only this ref */
489 "new-value", /* value for aligned qualities */
490 NULL, /* no quantize mismatch qualities */
491 "number", /* max. record count to process */
492 "number", /* max. error count */
493 "path-to-file", /* reference fasta file */
494 NULL, /* use XT->TI */
495 "count", /* max. duplicate warning count */
496 NULL, /* allow hard clipping */
497 NULL, /* allow multimapping */
498 NULL, /* allow secondary */
499 NULL /* defer secondary */
500 };
501
UsageSummary(char const * progname)502 rc_t UsageSummary (char const * progname)
503 {
504 return KOutMsg (
505 "Usage:\n"
506 "\t%s [options] <bam-file>\n"
507 "\n"
508 "Summary:\n"
509 "\tLoad a BAM formatted data file\n"
510 "\n"
511 "Example:\n"
512 "\t%s -o /tmp/SRZ123456 -k analysis.bam.cfg 123456.bam\n"
513 "\n"
514 ,progname, progname);
515 }
516
517 char const UsageDefaultName[] = "bam-load";
518
Usage(const Args * args)519 rc_t CC Usage (const Args * args)
520 {
521 rc_t rc;
522 int i;
523 const char * progname = UsageDefaultName;
524 const char * fullpath = UsageDefaultName;
525 const size_t argsQty = sizeof(Options) / sizeof(Options[0]);
526
527 if (args == NULL)
528 rc = RC (rcApp, rcArgv, rcAccessing, rcSelf, rcNull);
529 else
530 rc = ArgsProgram (args, &fullpath, &progname);
531 if (rc)
532 progname = fullpath = UsageDefaultName;
533
534 UsageSummary (progname);
535
536 for(i = 0; i < argsQty; i++ ) {
537 if( Options[i].required && Options[i].help[0] != NULL ) {
538 HelpOptionLine(Options[i].aliases, Options[i].name, OptHelpParam[i], Options[i].help);
539 }
540 }
541 OUTMSG(("\nOptions:\n"));
542 for(i = 0; i < argsQty; i++ ) {
543 if( !Options[i].required && Options[i].help[0] != NULL ) {
544 HelpOptionLine(Options[i].aliases, Options[i].name, OptHelpParam[i], Options[i].help);
545 }
546 }
547 XMLLogger_Usage();
548 OUTMSG(("\n"));
549 HelpOptionsStandard ();
550 HelpVersion (fullpath, KAppVersion());
551 return rc;
552 }
553
554 /* MARK: Definitions and Globals */
555
556 #define SCHEMAFILE "align/align.vschema"
557
558 Globals G;
559
560 #ifdef _WIN32
561 #include <process.h>
562 #else
563 #include <unistd.h>
564 #endif
set_pid(void)565 static void set_pid(void)
566 {
567 G.pid = getpid();
568 }
569
PathWithBasePath(char rslt[],size_t sz,char const path[],char const base[])570 static rc_t PathWithBasePath(char rslt[], size_t sz, char const path[], char const base[])
571 {
572 size_t const plen = strlen(path);
573 bool const hasBase = base && base[0];
574 bool const isBareName = strchr(path, '/') == NULL;
575
576 if (isBareName && hasBase) {
577 if (string_printf(rslt, sz, NULL, "%s/%s", base, path) == 0)
578 return 0;
579 }
580 else if (plen < sz) {
581 strncpy(rslt, path, sz);
582 return 0;
583 }
584 {
585 rc_t const rc = RC(rcApp, rcArgv, rcAccessing, rcBuffer, rcInsufficient);
586 (void)LOGERR(klogErr, rc, "The path to the file is too long");
587 return rc;
588 }
589 }
590
OpenFile(KFile const ** kf,char const path[],char const base[])591 static rc_t OpenFile(KFile const **kf, char const path[], char const base[])
592 {
593 char fname[4096];
594 rc_t rc = PathWithBasePath(fname, sizeof(fname), path, base);
595
596 if (rc == 0) {
597 KDirectory *dir;
598
599 rc = KDirectoryNativeDir(&dir);
600 if (rc == 0) {
601 rc = KDirectoryOpenFileRead(dir, kf, "%s", fname);
602 KDirectoryRelease(dir);
603 }
604 }
605 return rc;
606 }
607
LoadHeader(char const ** rslt,char const path[],char const base[])608 static rc_t LoadHeader(char const **rslt, char const path[], char const base[])
609 {
610 KFile const *kf;
611 rc_t rc = OpenFile(&kf, path, base);
612
613 *rslt = NULL;
614 if (rc == 0) {
615 uint64_t fsize;
616 rc = KFileSize(kf, &fsize);
617 if (rc == 0) {
618 char *fdata = malloc(fsize+1);
619
620 if (fdata) {
621 size_t nread;
622 rc = KFileRead(kf, 0, fdata, fsize, &nread);
623 if (rc == 0) {
624 if (nread) {
625 fdata[nread] = '\0';
626 *rslt = fdata;
627 }
628 else {
629 free(fdata);
630 rc = RC(rcApp, rcArgv, rcAccessing, rcFile, rcEmpty);
631 (void)PLOGERR(klogErr, (klogErr, rc, "File '$(file)' is empty", "file=%s", path));
632 }
633 }
634 else {
635 (void)PLOGERR(klogErr, (klogErr, rc, "Failed to read file '$(file)'", "file=%s", path));
636 }
637 }
638 else {
639 rc = RC(rcApp, rcArgv, rcAccessing, rcMemory, rcExhausted);
640 (void)PLOGERR(klogErr, (klogErr, rc, "Failed to read file '$(file)'", "file=%s", path));
641 }
642 }
643 KFileRelease(kf);
644 }
645 else {
646 (void)PLOGERR(klogErr, (klogErr, rc, "Failed to open file '$(file)'", "file=%s", path));
647 }
648 return rc;
649 }
650
main_help_vers(int argc,char * argv[])651 static rc_t main_help_vers(int argc, char * argv[])
652 {
653 Args *args = NULL;
654 rc_t const rc = ArgsMakeAndHandle (&args, argc, argv, 2, Options,
655 sizeof Options / sizeof (OptDef), XMLLogger_Args, XMLLogger_ArgsQty);
656 ArgsWhack(args);
657 return rc;
658 }
659
getArgValue(Args * const args,char const * name,int index,char const ** result)660 static rc_t getArgValue(Args *const args, char const *name, int index, char const **result)
661 {
662 void const *value;
663 rc_t const rc = ArgsOptionValue(args, name, index, &value);
664 if (rc) return rc;
665 free((void *)*result);
666 *result = strdup(value);
667 assert(*result);
668 return 0;
669 }
670
main_1(int argc,char * argv[],bool const continuing,unsigned const load)671 static rc_t main_1(int argc, char *argv[], bool const continuing, unsigned const load)
672 {
673 Args *args;
674 rc_t rc;
675 unsigned n_aligned = 0;
676 unsigned n_unalgnd = 0;
677 char *aligned[256];
678 char *unalgnd[256];
679 char *name_buffer = NULL;
680 unsigned next_name = 0;
681 unsigned nbsz = 0;
682 char const *value;
683 char *dummy;
684
685 rc = ArgsMakeAndHandle (&args, argc, argv, 1, Options, sizeof(Options)/sizeof(Options[0]));
686 while (rc == 0) {
687 uint32_t pcount;
688
689 rc = ArgsOptionCount(args, option_only_verify, &pcount);
690 if (rc)
691 break;
692 G.onlyVerifyReferences |= (pcount > 0);
693
694 rc = ArgsOptionCount(args, option_no_verify, &pcount);
695 if (rc)
696 break;
697 G.noVerifyReferences |= (pcount > 0);
698
699 rc = ArgsOptionCount(args, option_use_qual, &pcount);
700 if (rc)
701 break;
702 G.useQUAL |= (pcount > 0);
703
704 rc = ArgsOptionCount(args, option_ref_config, &pcount);
705 if (rc)
706 break;
707 G.limit2config |= (pcount > 0);
708
709 rc = ArgsOptionCount(args, OPTION_REF_FILE, &pcount);
710 if (rc)
711 break;
712 if (pcount && G.refFiles) {
713 int i;
714
715 for (i = 0; G.refFiles[i]; ++i)
716 free((void *)G.refFiles[i]);
717 free((void *)G.refFiles);
718 }
719 G.refFiles = calloc(pcount + 1, sizeof(*(G.refFiles)));
720 if (!G.refFiles) {
721 rc = RC(rcApp, rcArgv, rcAccessing, rcMemory, rcExhausted);
722 break;
723 }
724 while(pcount-- > 0) {
725 rc = getArgValue(args, OPTION_REF_FILE, pcount, &G.refFiles[pcount]);
726 if (rc)
727 break;
728 }
729
730 rc = ArgsOptionCount (args, OPTION_TMPFS, &pcount);
731 if (rc)
732 break;
733 if (pcount == 1)
734 {
735 rc = getArgValue(args, OPTION_TMPFS, 0, &G.tmpfs);
736 if (rc)
737 break;
738 }
739 else if (pcount > 1)
740 {
741 rc = RC(rcApp, rcArgv, rcAccessing, rcParam, rcExcessive);
742 OUTMSG (("Single parameter required\n"));
743 MiniUsage (args);
744 break;
745 }
746
747 rc = ArgsOptionCount (args, OPTION_INPUT, &pcount);
748 if (rc)
749 break;
750 if (pcount == 1)
751 {
752 rc = getArgValue(args, OPTION_INPUT, 0, &G.inpath);
753 if (rc)
754 break;
755 }
756 else if (pcount > 1)
757 {
758 rc = RC(rcApp, rcArgv, rcAccessing, rcParam, rcExcessive);
759 OUTMSG (("Single input parameter required\n"));
760 MiniUsage (args);
761 break;
762 }
763
764 rc = ArgsOptionCount (args, option_ref_filter, &pcount);
765 if (rc)
766 break;
767 if (pcount == 1)
768 {
769 rc = getArgValue(args, option_ref_filter, 0, &G.refFilter);
770 if (rc)
771 break;
772 }
773 else if (pcount > 1)
774 {
775 rc = RC(rcApp, rcArgv, rcAccessing, rcParam, rcExcessive);
776 OUTMSG (("Single parameter required\n"));
777 MiniUsage (args);
778 break;
779 }
780
781 rc = ArgsOptionCount (args, OPTION_CONFIG, &pcount);
782 if (rc)
783 break;
784 if (pcount == 1)
785 {
786 rc = getArgValue(args, OPTION_CONFIG, 0, &G.refXRefPath);
787 if (rc)
788 break;
789 }
790 else if (pcount > 1)
791 {
792 rc = RC(rcApp, rcArgv, rcAccessing, rcParam, rcExcessive);
793 OUTMSG (("Single input parameter required\n"));
794 MiniUsage (args);
795 break;
796 }
797
798 rc = ArgsOptionCount (args, OPTION_OUTPUT, &pcount);
799 if (rc)
800 break;
801 if (pcount == 1)
802 {
803 rc = getArgValue(args, OPTION_OUTPUT, 0, &G.outpath);
804 if (rc)
805 break;
806 if (load == 0) {
807 G.firstOut = strdup(G.outpath);
808 }
809 value = strrchr(G.outpath, '/');
810 G.outname = value ? (value + 1) : G.outpath;
811 }
812 else if (pcount > 1)
813 {
814 rc = RC(rcApp, rcArgv, rcAccessing, rcParam, rcExcessive);
815 OUTMSG (("Single output parameter required\n"));
816 MiniUsage (args);
817 break;
818 }
819 else if (!G.onlyVerifyReferences) {
820 rc = RC(rcApp, rcArgv, rcAccessing, rcParam, rcInsufficient);
821 OUTMSG (("Output parameter required\n"));
822 MiniUsage (args);
823 break;
824 }
825
826 rc = ArgsOptionCount (args, OPTION_MINMAPQ, &pcount);
827 if (rc)
828 break;
829 if (pcount == 1)
830 {
831 rc = ArgsOptionValue(args, OPTION_MINMAPQ, 0, (const void **)&value);
832 if (rc)
833 break;
834 G.minMapQual = strtoul(value, &dummy, 0);
835 }
836
837 rc = ArgsOptionCount (args, OPTION_QCOMP, &pcount);
838 if (rc)
839 break;
840 if (pcount == 1)
841 {
842 rc = getArgValue(args, OPTION_QCOMP, 0, &G.QualQuantizer);
843 if (rc)
844 break;
845 }
846
847 rc = ArgsOptionCount (args, option_edit_aligned_qual, &pcount);
848 if (rc)
849 break;
850 if (pcount == 1)
851 {
852 rc = ArgsOptionValue (args, option_edit_aligned_qual, 0, (const void **)&value);
853 if (rc)
854 break;
855 G.alignedQualValue = strtoul(value, &dummy, 0);
856 if (G.alignedQualValue == 0) {
857 rc = RC(rcApp, rcArgv, rcAccessing, rcParam, rcIncorrect);
858 OUTMSG (("edit-aligned-qual: bad value\n"));
859 MiniUsage (args);
860 break;
861 }
862 G.editAlignedQual = true;
863 }
864
865 rc = ArgsOptionCount (args, OPTION_CACHE_SIZE, &pcount);
866 if (rc)
867 break;
868 if (pcount == 1)
869 {
870 rc = ArgsOptionValue (args, OPTION_CACHE_SIZE, 0, (const void **)&value);
871 if (rc)
872 break;
873 G.cache_size = strtoul(value, &dummy, 0) * 1024UL * 1024UL;
874 if (G.cache_size == 0) {
875 rc = RC(rcApp, rcArgv, rcAccessing, rcParam, rcIncorrect);
876 OUTMSG (("cache-size: bad value\n"));
877 MiniUsage (args);
878 break;
879 }
880 }
881
882 rc = ArgsOptionCount (args, OPTION_MAX_WARN_DUP_FLAG, &pcount);
883 if (rc)
884 break;
885 if (pcount == 1)
886 {
887 rc = ArgsOptionValue (args, OPTION_MAX_WARN_DUP_FLAG, 0, (const void **)&value);
888 if (rc)
889 break;
890 G.maxWarnCount_DupConflict = strtoul(value, &dummy, 0);
891 }
892
893 rc = ArgsOptionCount (args, option_unsorted, &pcount);
894 if (rc)
895 break;
896 G.expectUnsorted |= (pcount > 0);
897
898 rc = ArgsOptionCount (args, option_sorted, &pcount);
899 if (rc)
900 break;
901 G.requireSorted |= (pcount > 0);
902
903 rc = ArgsOptionCount (args, OPTION_MAX_REC_COUNT, &pcount);
904 if (rc)
905 break;
906 if (pcount == 1)
907 {
908 rc = ArgsOptionValue (args, OPTION_MAX_REC_COUNT, 0, (const void **)&value);
909 if (rc)
910 break;
911 G.maxAlignCount = strtoul(value, &dummy, 0);
912 }
913
914 rc = ArgsOptionCount (args, OPTION_MAX_ERR_COUNT, &pcount);
915 if (rc)
916 break;
917 if (pcount == 1)
918 {
919 rc = ArgsOptionValue (args, OPTION_MAX_ERR_COUNT, 0, (const void **)&value);
920 if (rc)
921 break;
922 G.maxErrCount = strtoul(value, &dummy, 0);
923 }
924
925 rc = ArgsOptionCount (args, OPTION_MIN_MATCH, &pcount);
926 if (rc)
927 break;
928 if (pcount == 1)
929 {
930 rc = ArgsOptionValue (args, OPTION_MIN_MATCH, 0, (const void **)&value);
931 if (rc)
932 break;
933 G.minMatchCount = strtoul(value, &dummy, 0);
934 }
935
936 rc = ArgsOptionCount (args, OPTION_ACCEPT_DUP, &pcount);
937 if (rc)
938 break;
939 G.acceptBadDups |= (pcount > 0);
940
941 rc = ArgsOptionCount (args, OPTION_ACCEPT_NOMATCH, &pcount);
942 if (rc)
943 break;
944 G.acceptNoMatch |= (pcount > 0);
945
946 rc = ArgsOptionCount (args, option_keep_mismatch_qual, &pcount);
947 if (rc)
948 break;
949 G.keepMismatchQual |= (pcount > 0);
950
951 rc = ArgsOptionCount (args, OPTION_NO_CS, &pcount);
952 if (rc)
953 break;
954 G.noColorSpace |= (pcount > 0);
955
956 rc = ArgsOptionCount (args, OPTION_NO_SECONDARY, &pcount);
957 if (rc)
958 break;
959 G.noSecondary |= (pcount > 0);
960
961 rc = ArgsOptionCount (args, OPTION_TI, &pcount);
962 if (rc)
963 break;
964 G.hasTI |= (pcount > 0);
965
966 rc = ArgsOptionCount (args, OPTION_ACCEPT_HARD_CLIP, &pcount);
967 if (rc)
968 break;
969 G.acceptHardClip |= (pcount > 0);
970
971 rc = ArgsOptionCount (args, OPTION_ALLOW_MULTI_MAP, &pcount);
972 if (rc)
973 break;
974 G.allowMultiMapping |= (pcount > 0);
975
976 rc = ArgsOptionCount (args, OPTION_ALLOW_SECONDARY, &pcount);
977 if (rc)
978 break;
979 G.assembleWithSecondary |= (pcount > 0);
980
981 rc = ArgsOptionCount (args, OPTION_DEFER_SECONDARY, &pcount);
982 if (rc)
983 break;
984 G.deferSecondary |= (pcount > 0);
985
986 rc = ArgsOptionCount (args, OPTION_NOMATCH_LOG, &pcount);
987 if (rc)
988 break;
989 if (pcount == 1)
990 {
991 KDirectory *dir;
992
993 rc = ArgsOptionValue (args, OPTION_NOMATCH_LOG, 0, (const void **)&value);
994 if (rc) break;
995 rc = KDirectoryNativeDir(&dir);
996 if (rc) break;
997 rc = KDirectoryCreateFile(dir, &G.noMatchLog, 0, 0664, kcmInit, "%s", value);
998 KDirectoryRelease(dir);
999 if (rc) break;
1000 }
1001
1002 rc = ArgsOptionCount (args, OPTION_HEADER, &pcount);
1003 if (rc)
1004 break;
1005 if (pcount == 1) {
1006 rc = ArgsOptionValue (args, OPTION_HEADER, 0, (const void **)&value);
1007 if (rc) break;
1008 free((void *)G.headerText);
1009 rc = LoadHeader(&G.headerText, value, G.inpath);
1010 if (rc) break;
1011 }
1012
1013 rc = ArgsParamCount (args, &pcount);
1014 if (rc) break;
1015 if (pcount == 0)
1016 {
1017 rc = RC(rcApp, rcArgv, rcAccessing, rcParam, rcInsufficient);
1018 MiniUsage (args);
1019 break;
1020 }
1021 else if (pcount > sizeof(aligned)/sizeof(aligned[0])) {
1022 rc = RC(rcApp, rcArgv, rcAccessing, rcParam, rcExcessive);
1023 (void)PLOGERR(klogErr, (klogErr, rc, "$(count) input files is too many, $(max) is the limit",
1024 "count=%u,max=%u", (unsigned)pcount, (unsigned)(sizeof(aligned)/sizeof(aligned[0]))));
1025 break;
1026 }
1027 else {
1028 unsigned need = G.inpath ? (strlen(G.inpath) + 1) * pcount : 0;
1029 unsigned i;
1030
1031 for (i = 0; i < pcount; ++i) {
1032 rc = ArgsParamValue(args, i, (const void **)&value);
1033 if (rc) break;
1034 need += strlen(value) + 1;
1035 }
1036 nbsz = need;
1037 }
1038
1039 rc = ArgsOptionCount (args, OPTION_UNALIGNED, &pcount);
1040 if (rc)
1041 break;
1042 if (pcount > 0)
1043 {
1044 unsigned need = G.inpath ? (strlen(G.inpath) + 1) * pcount : 0;
1045 unsigned i;
1046
1047 for (i = 0; i < pcount; ++i) {
1048 rc = ArgsOptionValue(args, OPTION_UNALIGNED, i, (const void **)&value);
1049 if (rc) break;
1050 need += strlen(value) + 1;
1051 }
1052 if (rc) break;
1053 nbsz += need;
1054 }
1055
1056 name_buffer = malloc(nbsz);
1057 if (name_buffer == NULL) {
1058 rc = RC(rcApp, rcArgv, rcAccessing, rcMemory, rcExhausted);
1059 break;
1060 }
1061
1062 rc = ArgsOptionCount (args, OPTION_UNALIGNED, &pcount);
1063 if (rc == 0) {
1064 unsigned i;
1065
1066 for (i = 0; i < pcount; ++i) {
1067 rc = ArgsOptionValue(args, OPTION_UNALIGNED, i, (const void **)&value);
1068 if (rc) break;
1069
1070 unalgnd[n_unalgnd++] = name_buffer + next_name;
1071 rc = PathWithBasePath(name_buffer + next_name, nbsz - next_name, value, G.inpath);
1072 if (rc) break;
1073 next_name += strlen(name_buffer + next_name) + 1;
1074 }
1075 if (rc) break;
1076 }
1077 else
1078 break;
1079
1080 rc = ArgsParamCount (args, &pcount);
1081 if (rc == 0) {
1082 unsigned i;
1083
1084 for (i = 0; i < pcount; ++i) {
1085 rc = ArgsParamValue(args, i, (const void **)&value);
1086 if (rc) break;
1087
1088 aligned[n_aligned++] = name_buffer + next_name;
1089 rc = PathWithBasePath(name_buffer + next_name, nbsz - next_name, value, G.inpath);
1090 if (rc) break;
1091 next_name += strlen(name_buffer + next_name) + 1;
1092 }
1093 }
1094 else
1095 break;
1096
1097 rc = run(argv[0], n_aligned, (char const **)aligned, n_unalgnd, (char const **)unalgnd, continuing);
1098 break;
1099 }
1100 free(name_buffer);
1101
1102 if (rc) {
1103 (void)PLOGERR(klogErr, (klogErr, rc, "load failed",
1104 "severity=total,status=failure,accession=%s,errors=%u", G.outname, G.errCount));
1105 } else {
1106 (void)PLOGMSG(klogInfo, (klogInfo, "loaded",
1107 "severity=total,status=success,accession=%s,errors=%u", G.outname, G.errCount));
1108 }
1109 ArgsWhack(args);
1110 return rc;
1111 }
1112
cleanupGlobal(void)1113 static void cleanupGlobal(void)
1114 {
1115 if (G.refFiles) {
1116 int i;
1117
1118 for (i = 0; G.refFiles[i]; ++i)
1119 free((void *)G.refFiles[i]);
1120 free((void *)G.refFiles);
1121 }
1122 free((void *)G.tmpfs);
1123 free((void *)G.inpath);
1124 free((void *)G.refFilter);
1125 free((void *)G.refXRefPath);
1126 free((void *)G.outpath);
1127 free((void *)G.firstOut);
1128 free((void *)G.headerText);
1129 free((void *)G.QualQuantizer);
1130 free((void *)G.schemaPath);
1131 }
1132
find_arg(char const * const * const query,int const first,int const argc,char ** const argv)1133 static int find_arg(char const *const *const query, int const first, int const argc, char **const argv)
1134 {
1135 int i;
1136
1137 for (i = first; i < argc; ++i) {
1138 int j;
1139
1140 for (j = 0; query[j] != NULL; ++j) {
1141 if (strcmp(argv[i], query[j]) == 0)
1142 return i;
1143 }
1144 }
1145 return 0;
1146 }
1147
has_arg(char const * const * const query,int const argc,char ** const argv)1148 static bool has_arg(char const *const *const query, int const argc, char **const argv)
1149 {
1150 return find_arg(query, 1, argc, argv) > 0;
1151 }
1152
1153 static const char *logger_options[] = { "--xml-log-fd", "--xml-log", "-z" };
make_logger(int * argc,char * argv[])1154 static XMLLogger const *make_logger(int *argc, char *argv[])
1155 {
1156 XMLLogger const *rslt = NULL;
1157 char *argf[4];
1158 int i;
1159
1160 argf[0] = argv[0];
1161 argf[1] = NULL;
1162 argf[2] = NULL;
1163 argf[3] = NULL;
1164
1165 for (i = 1; i < *argc; ++i) {
1166 int remove = 0;
1167
1168 if (strcmp(argv[i], logger_options[2]) == 0) {
1169 argf[1] = argv[i];
1170 argf[2] = argv[i + 1];
1171 remove = 2;
1172 }
1173 else {
1174 int j;
1175
1176 for (j = 0; j < 2; ++j) {
1177 if (strstr(argv[i], logger_options[j]) == argv[i]) {
1178 int const n = strlen(logger_options[j]);
1179
1180 if (argv[i][n] == '\0') {
1181 argf[1] = argv[i];
1182 argf[2] = argv[i + 1];
1183 remove = 2;
1184 }
1185 else if (argv[i][n] == '=') {
1186 argv[i][n] = '\0';
1187 argf[1] = argv[i];
1188 argf[2] = argv[i] + n + 1;
1189 remove = 1;
1190 }
1191 break;
1192 }
1193 }
1194 }
1195 if (argf[1] != NULL) {
1196 Args *args = NULL;
1197
1198 ArgsMakeAndHandle(&args, 3, argf, 1, XMLLogger_Args, XMLLogger_ArgsQty);
1199 if (args) {
1200 XMLLogger_Make(&rslt, NULL, args);
1201 ArgsWhack(args);
1202 }
1203 }
1204 if (remove) {
1205 *argc -= remove;
1206 memmove(argv + i, argv + i + remove, (*argc + 1) * sizeof(argv[0]));
1207 break;
1208 }
1209 }
1210 return rslt;
1211 }
1212
KMain(int argc,char * argv[])1213 rc_t CC KMain(int argc, char *argv[])
1214 {
1215 static const char *help[] = { "--help", "-h", "-?", NULL };
1216 static const char *vers[] = { "--version", "-V", NULL };
1217
1218 bool const has_help = has_arg(help, argc, argv);
1219 bool const has_vers = has_arg(vers, argc, argv);
1220 XMLLogger const *logger = NULL;
1221 int argfirst = 0;
1222 int arglast = 0;
1223 rc_t rc = 0;
1224 unsigned load = 0;
1225
1226 if (has_help) {
1227 argc = 2;
1228 argv[1] = "--help";
1229 return main_help_vers(argc, argv);
1230 }
1231 if (has_vers) {
1232 argc = 2;
1233 argv[1] = "--version";
1234 return main_help_vers(argc, argv);
1235 }
1236
1237 logger = make_logger(&argc, argv);
1238
1239 memset(&G, 0, sizeof(G));
1240 G.mode = mode_Archive;
1241 G.globalMode = mode_Archive;
1242 G.maxSeqLen = TableWriterRefSeq_MAX_SEQ_LEN;
1243 G.schemaPath = strdup(SCHEMAFILE);
1244 G.omit_aligned_reads = true;
1245 G.omit_reference_reads = true;
1246 G.minMapQual = 0; /* accept all */
1247 G.tmpfs = strdup("/tmp");
1248 G.cache_size = ((size_t)16) << 30;
1249 G.maxErrCount = 1000;
1250 G.minMatchCount = 10;
1251
1252 set_pid();
1253
1254 for (arglast = 1; arglast < argc; ++arglast) {
1255 if (strcmp(argv[arglast], "--remap") == 0) {
1256 argv[arglast] = argv[0];
1257 G.globalMode = mode_Remap;
1258 rc = main_1(arglast - argfirst, argv + argfirst, true, load);
1259 if (rc)
1260 break;
1261 G.mode = mode_Remap;
1262 argfirst = arglast;
1263 ++load;
1264 }
1265 }
1266 rc = main_1(arglast - argfirst, argv + argfirst, false, load);
1267 XMLLogger_Release(logger);
1268 cleanupGlobal();
1269 return rc;
1270 }
1271