1 #include "samtools.pysam.h"
2
3 /* bamtk.c -- main samtools command front-end.
4
5 Copyright (C) 2008-2021 Genome Research Ltd.
6
7 Author: Heng Li <lh3@sanger.ac.uk>
8
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE. */
26
27 #include <config.h>
28
29 #include <stdio.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include <string.h>
33
34 #include "htslib/hts.h"
35 #include "htslib/hfile.h"
36 #include "samtools.h"
37 #include "version.h"
38 #include "samtools_config_vars.h"
39
40 int bam_taf2baf(int argc, char *argv[]);
41 int bam_mpileup(int argc, char *argv[]);
42 int bam_merge(int argc, char *argv[]);
43 int bam_index(int argc, char *argv[]);
44 int bam_sort(int argc, char *argv[]);
45 int bam_tview_main(int argc, char *argv[]);
46 int bam_mating(int argc, char *argv[]);
47 int bam_rmdup(int argc, char *argv[]);
48 int bam_flagstat(int argc, char *argv[]);
49 int bam_fillmd(int argc, char *argv[]);
50 int bam_idxstats(int argc, char *argv[]);
51 int bam_markdup(int argc, char *argv[]);
52 int main_samview(int argc, char *argv[]);
53 int samtools_main_reheader(int argc, char *argv[]);
54 int main_cut_target(int argc, char *argv[]);
55 int main_phase(int argc, char *argv[]);
56 int main_cat(int argc, char *argv[]);
57 int main_depth(int argc, char *argv[]);
58 int main_coverage(int argc, char *argv[]);
59 int main_bam2fq(int argc, char *argv[]);
60 int main_pad2unpad(int argc, char *argv[]);
61 int main_bedcov(int argc, char *argv[]);
62 int main_bamshuf(int argc, char *argv[]);
63 int main_stats(int argc, char *argv[]);
64 int main_flags(int argc, char *argv[]);
65 int main_split(int argc, char *argv[]);
66 int main_quickcheck(int argc, char *argv[]);
67 int main_addreplacerg(int argc, char *argv[]);
68 int faidx_main(int argc, char *argv[]);
69 int dict_main(int argc, char *argv[]);
70 int fqidx_main(int argc, char *argv[]);
71 int amplicon_clip_main(int argc, char *argv[]);
72 int main_ampliconstats(int argc, char *argv[]);
73 int main_import(int argc, char *argv[]);
74 int main_samples(int argc, char *argv[]);
75
samtools_version()76 const char *samtools_version()
77 {
78 return SAMTOOLS_VERSION;
79 }
80
81 // These come out of the config.h file built by autoconf or Makefile
samtools_feature_string(void)82 const char *samtools_feature_string(void) {
83 const char *fmt =
84
85 #ifdef PACKAGE_URL
86 "build=configure "
87 #else
88 "build=Makefile "
89 #endif
90
91 #ifdef HAVE_CURSES
92 "curses=yes "
93 #else
94 "curses=no "
95 #endif
96 ;
97
98 return fmt;
99 }
100
long_version(void)101 static void long_version(void) {
102 fprintf(samtools_stdout, "samtools %s\n"
103 "Using htslib %s\n"
104 "Copyright (C) 2021 Genome Research Ltd.\n",
105 samtools_version(), hts_version());
106
107 fprintf(samtools_stdout, "\nSamtools compilation details:\n");
108 fprintf(samtools_stdout, " Features: %s\n", samtools_feature_string());
109 fprintf(samtools_stdout, " CC: %s\n", SAMTOOLS_CC);
110 fprintf(samtools_stdout, " CPPFLAGS: %s\n", SAMTOOLS_CPPFLAGS);
111 fprintf(samtools_stdout, " CFLAGS: %s\n", SAMTOOLS_CFLAGS);
112 fprintf(samtools_stdout, " LDFLAGS: %s\n", SAMTOOLS_LDFLAGS);
113 fprintf(samtools_stdout, " HTSDIR: %s\n", SAMTOOLS_HTSDIR);
114 fprintf(samtools_stdout, " LIBS: %s\n", SAMTOOLS_LIBS);
115 fprintf(samtools_stdout, " CURSES_LIB: %s\n", SAMTOOLS_CURSES_LIB);
116
117 fprintf(samtools_stdout, "\nHTSlib compilation details:\n");
118 fprintf(samtools_stdout, " Features: %s\n", hts_feature_string());
119 fprintf(samtools_stdout, " CC: %s\n", hts_test_feature(HTS_FEATURE_CC));
120 fprintf(samtools_stdout, " CPPFLAGS: %s\n", hts_test_feature(HTS_FEATURE_CPPFLAGS));
121 fprintf(samtools_stdout, " CFLAGS: %s\n", hts_test_feature(HTS_FEATURE_CFLAGS));
122 fprintf(samtools_stdout, " LDFLAGS: %s\n", hts_test_feature(HTS_FEATURE_LDFLAGS));
123
124 // Plugins and schemes
125 fprintf(samtools_stdout, "\nHTSlib URL scheme handlers present:\n");
126 const char *plugins[100];
127 int np = 100, i, j;
128
129 if (hfile_list_plugins(plugins, &np) < 0)
130 return;
131
132 for (i = 0; i < np; i++) {
133 const char *sc_list[100];
134 int nschemes = 100;
135 if (hfile_list_schemes(plugins[i], sc_list, &nschemes) < 0)
136 return;
137
138 fprintf(samtools_stdout, " %s:\t", plugins[i]);
139 for (j = 0; j < nschemes; j++)
140 fprintf(samtools_stdout, " %s%c", sc_list[j], ",\n"[j+1==nschemes]);
141 }
142 }
143
usage(FILE * fp)144 static void usage(FILE *fp)
145 {
146 /* Please improve the grouping */
147
148 fprintf(fp,
149 "\n"
150 "Program: samtools (Tools for alignments in the SAM format)\n"
151 "Version: %s (using htslib %s)\n\n", samtools_version(), hts_version());
152 fprintf(fp,
153 "Usage: samtools <command> [options]\n"
154 "\n"
155 "Commands:\n"
156 " -- Indexing\n"
157 " dict create a sequence dictionary file\n"
158 " faidx index/extract FASTA\n"
159 " fqidx index/extract FASTQ\n"
160 " index index alignment\n"
161 "\n"
162 " -- Editing\n"
163 " calmd recalculate MD/NM tags and '=' bases\n"
164 " fixmate fix mate information\n"
165 " reheader replace BAM header\n"
166 " targetcut cut fosmid regions (for fosmid pool only)\n"
167 " addreplacerg adds or replaces RG tags\n"
168 " markdup mark duplicates\n"
169 " ampliconclip clip oligos from the end of reads\n"
170 "\n"
171 " -- File operations\n"
172 " collate shuffle and group alignments by name\n"
173 " cat concatenate BAMs\n"
174 " merge merge sorted alignments\n"
175 " mpileup multi-way pileup\n"
176 " sort sort alignment file\n"
177 " split splits a file by read group\n"
178 " quickcheck quickly check if SAM/BAM/CRAM file appears intact\n"
179 " fastq converts a BAM to a FASTQ\n"
180 " fasta converts a BAM to a FASTA\n"
181 " import Converts FASTA or FASTQ files to SAM/BAM/CRAM\n"
182 "\n"
183 " -- Statistics\n"
184 " bedcov read depth per BED region\n"
185 " coverage alignment depth and percent coverage\n"
186 " depth compute the depth\n"
187 " flagstat simple stats\n"
188 " idxstats BAM index stats\n"
189 " phase phase heterozygotes\n"
190 " stats generate stats (former bamcheck)\n"
191 " ampliconstats generate amplicon specific stats\n"
192 "\n"
193 " -- Viewing\n"
194 " flags explain BAM flags\n"
195 " tview text alignment viewer\n"
196 " view SAM<->BAM<->CRAM conversion\n"
197 " depad convert padded BAM to unpadded BAM\n"
198 " samples list the samples in a set of SAM/BAM/CRAM files\n"
199 "\n"
200 " -- Misc\n"
201 " help [cmd] display this help message or help for [cmd]\n"
202 " version detailed version information\n"
203 "\n");
204 }
205
206 // This is a tricky one, but on Windows the filename wildcard expansion is done by
207 // the application and not by the shell, as traditionally it never had a "shell".
208 // Even now, DOS and Powershell do not do this expansion (but bash does).
209 //
210 // This means that Mingw/Msys implements code before main() that takes e.g. "*" and
211 // expands it up to a list of matching filenames. This in turn breaks things like
212 // specifying "*" as a region (all the unmapped reads). We take a hard line here -
213 // filename expansion is the task of the shell, not our application!
214 #ifdef _WIN32
215 int _CRT_glob = 0;
216 #endif
217
samtools_main(int argc,char * argv[])218 int samtools_main(int argc, char *argv[])
219 {
220 #ifdef _WIN32
221 setmode(fileno(samtools_stdout), O_BINARY);
222 setmode(fileno(stdin), O_BINARY);
223 #endif
224 if (argc < 2) { usage(samtools_stderr); return 1; }
225
226 if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0) {
227 if (argc == 2) { usage(samtools_stdout); return 0; }
228
229 // Otherwise change "samtools help COMMAND [...]" to "samtools COMMAND";
230 // main_xyz() functions by convention display the subcommand's usage
231 // when invoked without any arguments.
232 argv++;
233 argc = 2;
234 }
235
236 int ret = 0;
237 if (strcmp(argv[1], "view") == 0) ret = main_samview(argc-1, argv+1);
238 else if (strcmp(argv[1], "import") == 0) ret = main_import(argc-1, argv+1);
239 else if (strcmp(argv[1], "mpileup") == 0) ret = bam_mpileup(argc-1, argv+1);
240 else if (strcmp(argv[1], "merge") == 0) ret = bam_merge(argc-1, argv+1);
241 else if (strcmp(argv[1], "sort") == 0) ret = bam_sort(argc-1, argv+1);
242 else if (strcmp(argv[1], "index") == 0) ret = bam_index(argc-1, argv+1);
243 else if (strcmp(argv[1], "idxstat") == 0 ||
244 strcmp(argv[1], "idxstats") == 0) ret = bam_idxstats(argc-1, argv+1);
245 else if (strcmp(argv[1], "faidx") == 0) ret = faidx_main(argc-1, argv+1);
246 else if (strcmp(argv[1], "fqidx") == 0) ret = fqidx_main(argc-1, argv+1);
247 else if (strcmp(argv[1], "dict") == 0) ret = dict_main(argc-1, argv+1);
248 else if (strcmp(argv[1], "fixmate") == 0) ret = bam_mating(argc-1, argv+1);
249 else if (strcmp(argv[1], "rmdup") == 0) ret = bam_rmdup(argc-1, argv+1);
250 else if (strcmp(argv[1], "markdup") == 0) ret = bam_markdup(argc-1, argv+1);
251 else if (strcmp(argv[1], "ampliconclip") == 0) ret = amplicon_clip_main(argc-1, argv+1);
252 else if (strcmp(argv[1], "flagstat") == 0 ||
253 strcmp(argv[1], "flagstats") == 0) ret = bam_flagstat(argc-1, argv+1);
254 else if (strcmp(argv[1], "calmd") == 0) ret = bam_fillmd(argc-1, argv+1);
255 else if (strcmp(argv[1], "fillmd") == 0) ret = bam_fillmd(argc-1, argv+1);
256 else if (strcmp(argv[1], "reheader") == 0) ret = samtools_main_reheader(argc-1, argv+1);
257 else if (strcmp(argv[1], "cat") == 0) ret = main_cat(argc-1, argv+1);
258 else if (strcmp(argv[1], "targetcut") == 0) ret = main_cut_target(argc-1, argv+1);
259 else if (strcmp(argv[1], "phase") == 0) ret = main_phase(argc-1, argv+1);
260 else if (strcmp(argv[1], "depth") == 0) ret = main_depth(argc-1, argv+1);
261 else if (strcmp(argv[1], "coverage") == 0) ret = main_coverage(argc-1, argv+1);
262 else if (strcmp(argv[1], "bam2fq") == 0 ||
263 strcmp(argv[1], "fastq") == 0 ||
264 strcmp(argv[1], "fasta") == 0) ret = main_bam2fq(argc-1, argv+1);
265 else if (strcmp(argv[1], "pad2unpad") == 0) ret = main_pad2unpad(argc-1, argv+1);
266 else if (strcmp(argv[1], "depad") == 0) ret = main_pad2unpad(argc-1, argv+1);
267 else if (strcmp(argv[1], "bedcov") == 0) ret = main_bedcov(argc-1, argv+1);
268 else if (strcmp(argv[1], "bamshuf") == 0) ret = main_bamshuf(argc-1, argv+1);
269 else if (strcmp(argv[1], "collate") == 0) ret = main_bamshuf(argc-1, argv+1);
270 else if (strcmp(argv[1], "stat") == 0 ||
271 strcmp(argv[1], "stats") == 0) ret = main_stats(argc-1, argv+1);
272 else if (strcmp(argv[1], "flag") == 0 ||
273 strcmp(argv[1], "flags") == 0) ret = main_flags(argc-1, argv+1);
274 else if (strcmp(argv[1], "split") == 0) ret = main_split(argc-1, argv+1);
275 else if (strcmp(argv[1], "quickcheck") == 0) ret = main_quickcheck(argc-1, argv+1);
276 else if (strcmp(argv[1], "addreplacerg") == 0) ret = main_addreplacerg(argc-1, argv+1);
277 else if (strcmp(argv[1], "pileup") == 0) {
278 fprintf(samtools_stderr, "[main] The `pileup' command has been removed. Please use `mpileup' instead.\n");
279 return 1;
280 }
281 //else if (strcmp(argv[1], "tview") == 0) ret = bam_tview_main(argc-1, argv+1);
282 else if (strcmp(argv[1], "ampliconstats") == 0) ret = main_ampliconstats(argc-1, argv+1);
283 else if (strcmp(argv[1], "samples") == 0) ret = main_samples(argc-1, argv+1);
284 else if (strcmp(argv[1], "version") == 0 || \
285 strcmp(argv[1], "--version") == 0) {
286 long_version();
287 }
288 else if (strcmp(argv[1], "--version-only") == 0) {
289 fprintf(samtools_stdout, "%s+htslib-%s\n", samtools_version(), hts_version());
290 }
291 else {
292 fprintf(samtools_stderr, "[main] unrecognized command '%s'\n", argv[1]);
293 return 1;
294 }
295 return ret;
296 }
297