1 /*  sam_view.c -- SAM<->BAM<->CRAM conversion.
2 
3     Copyright (C) 2009-2021 Genome Research Ltd.
4     Portions copyright (C) 2009, 2011, 2012 Broad Institute.
5 
6     Author: Heng Li <lh3@sanger.ac.uk>
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this software and associated documentation files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14 
15 The above copyright notices and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 DEALINGS IN THE SOFTWARE.  */
25 
26 #include <config.h>
27 
28 #include <stdlib.h>
29 #include <string.h>
30 #include <strings.h>
31 #include <stdio.h>
32 #include <unistd.h>
33 #include <math.h>
34 #include <inttypes.h>
35 #include <getopt.h>
36 #include "htslib/sam.h"
37 #include "htslib/faidx.h"
38 #include "htslib/khash.h"
39 #include "htslib/thread_pool.h"
40 #include "htslib/hts_expr.h"
41 #include "samtools.h"
42 #include "sam_opts.h"
43 #include "bam.h" // for bam_get_library and bam_remove_B
44 #include "bedidx.h"
45 
46 KHASH_SET_INIT_STR(str)
47 typedef khash_t(str) *strhash_t;
48 
49 KHASH_SET_INIT_INT(aux_exists)
50 typedef khash_t(aux_exists) *auxhash_t;
51 
52 // This structure contains the settings for a samview run
53 typedef struct samview_settings {
54     strhash_t rghash;
55     strhash_t rnhash;
56     strhash_t tvhash;
57     int min_mapQ;
58 
59     // Described here in the same terms as the usage statement.
60     // The code however always negates to "reject if"         keep if:
61     int flag_on;     // keep   if (FLAG & N) == N             (all on)
62     int flag_off;    // keep   if (FLAG & N) == 0             (all off)
63     int flag_anyon;  // keep   if (FLAG & N) != 0             (any on)
64     int flag_alloff; // reject if (FLAG & N) == N             (any off)
65 
66     int min_qlen;
67     int remove_B;
68     uint32_t subsam_seed;
69     double subsam_frac;
70     char* library;
71     void* bed;
72     size_t remove_aux_len;
73     char** remove_aux;
74     int multi_region;
75     char* tag;
76     hts_filter_t *filter;
77     int remove_flag;
78     int add_flag;
79     int unmap;
80     auxhash_t remove_tag;
81     auxhash_t keep_tag;
82 } samview_settings_t;
83 
84 // Copied from htslib/sam.c.
85 // TODO: we need a proper interface to find the length of an aux tag,
86 // or at the very make exportable versions of these in htslib.
aux_type2size(uint8_t type)87 static inline int aux_type2size(uint8_t type)
88 {
89     switch (type) {
90     case 'A': case 'c': case 'C':
91         return 1;
92     case 's': case 'S':
93         return 2;
94     case 'i': case 'I': case 'f':
95         return 4;
96     case 'd':
97         return 8;
98     case 'Z': case 'H': case 'B':
99         return type;
100     default:
101         return 0;
102     }
103 }
104 
105 // Copied from htslib/sam.c.
skip_aux(uint8_t * s,uint8_t * end)106 static inline uint8_t *skip_aux(uint8_t *s, uint8_t *end)
107 {
108     int size;
109     uint32_t n;
110     if (s >= end) return end;
111     size = aux_type2size(*s); ++s; // skip type
112     switch (size) {
113     case 'Z':
114     case 'H':
115         while (s < end && *s) ++s;
116         return s < end ? s + 1 : end;
117     case 'B':
118         if (end - s < 5) return NULL;
119         size = aux_type2size(*s); ++s;
120         n = le_to_u32(s);
121         s += 4;
122         if (size == 0 || end - s < size * n) return NULL;
123         return s + size * n;
124     case 0:
125         return NULL;
126     default:
127         if (end - s < size) return NULL;
128         return s + size;
129     }
130 }
131 
132 // Returns 0 to indicate read should be output 1 otherwise
process_aln(const sam_hdr_t * h,bam1_t * b,samview_settings_t * settings)133 static int process_aln(const sam_hdr_t *h, bam1_t *b, samview_settings_t* settings)
134 {
135     if (settings->filter && sam_passes_filter(h, b, settings->filter) < 1)
136         return 1;
137 
138     if (settings->remove_B) bam_remove_B(b);
139     if (settings->min_qlen > 0) {
140         int k, qlen = 0;
141         uint32_t *cigar = bam_get_cigar(b);
142         for (k = 0; k < b->core.n_cigar; ++k)
143             if ((bam_cigar_type(bam_cigar_op(cigar[k]))&1) || bam_cigar_op(cigar[k]) == BAM_CHARD_CLIP)
144                 qlen += bam_cigar_oplen(cigar[k]);
145         if (qlen < settings->min_qlen) return 1;
146     }
147     if (b->core.qual < settings->min_mapQ || ((b->core.flag & settings->flag_on) != settings->flag_on) || (b->core.flag & settings->flag_off))
148         return 1;
149     if (settings->flag_alloff && ((b->core.flag & settings->flag_alloff) == settings->flag_alloff))
150         return 1;
151     if (settings->flag_anyon && ((b->core.flag & settings->flag_anyon) == 0))
152         return 1;
153     if (!settings->multi_region && settings->bed && (b->core.tid < 0 || !bed_overlap(settings->bed, sam_hdr_tid2name(h, b->core.tid), b->core.pos, bam_endpos(b))))
154         return 1;
155     if (settings->subsam_frac > 0.) {
156         uint32_t k = __ac_Wang_hash(__ac_X31_hash_string(bam_get_qname(b)) ^ settings->subsam_seed);
157         if ((double)(k&0xffffff) / 0x1000000 >= settings->subsam_frac) return 1;
158     }
159     if (settings->rghash) {
160         uint8_t *s = bam_aux_get(b, "RG");
161         if (s) {
162             khint_t k = kh_get(str, settings->rghash, (char*)(s + 1));
163             if (k == kh_end(settings->rghash)) return 1;
164         }
165     }
166     if (settings->tag) {
167         uint8_t *s = bam_aux_get(b, settings->tag);
168         if (s) {
169             if (settings->tvhash) {
170                 char t[32], *val;
171                 if (*s == 'i' || *s == 'I' || *s == 's' || *s == 'S' || *s == 'c' || *s == 'C') {
172                     int ret = snprintf(t, 32, "%"PRId64, bam_aux2i(s));
173                     if (ret > 0) val = t;
174                     else return 1;
175                 } else if (*s == 'A') {
176                     t[0] = *(s+1);
177                     t[1] = 0;
178                     val = t;
179                 } else {
180                     val = (char *)(s+1);
181                 }
182                 khint_t k = kh_get(str, settings->tvhash, val);
183                 if (k == kh_end(settings->tvhash)) return 1;
184             }
185         } else {
186             return 1;
187         }
188     }
189     if (settings->rnhash) {
190         const char* rn = bam_get_qname(b);
191         if (!rn || kh_get(str, settings->rnhash, rn) == kh_end(settings->rnhash)) {
192             return 1;
193         }
194     }
195     if (settings->library) {
196         const char *p = bam_get_library((sam_hdr_t*)h, b);
197         if (!p || strcmp(p, settings->library) != 0) return 1;
198     }
199     if (settings->keep_tag) {
200         uint8_t *s_from, *s_to, *end = b->data + b->l_data;
201         auxhash_t h = settings->keep_tag;
202 
203         s_from = s_to = bam_get_aux(b);
204         while (s_from < end) {
205             int x = (int)s_from[0]<<8 | s_from[1];
206             uint8_t *s = skip_aux(s_from+2, end);
207             if (s == NULL) {
208                 print_error("view", "malformed aux data for record \"%s\"",
209                             bam_get_qname(b));
210                 break;
211             }
212 
213             if (kh_get(aux_exists, h, x) != kh_end(h) ) {
214                 if (s_to != s_from) memmove(s_to, s_from, s - s_from);
215                 s_to += s - s_from;
216             }
217             s_from = s;
218         }
219         b->l_data = s_to - b->data;
220 
221     } else if (settings->remove_tag) {
222         uint8_t *s_from, *s_to, *end = b->data + b->l_data;
223         auxhash_t h = settings->remove_tag;
224 
225         s_from = s_to = bam_get_aux(b);
226         while (s_from < end) {
227             int x = (int)s_from[0]<<8 | s_from[1];
228             uint8_t *s = skip_aux(s_from+2, end);
229             if (s == NULL) {
230                 print_error("view", "malformed aux data for record \"%s\"",
231                             bam_get_qname(b));
232                 break;
233             }
234 
235             if (kh_get(aux_exists, h, x) == kh_end(h) ) {
236                 if (s_to != s_from) memmove(s_to, s_from, s - s_from);
237                 s_to += s - s_from;
238             }
239             s_from = s;
240         }
241         b->l_data = s_to - b->data;
242     }
243 
244     return 0;
245 }
246 
247 static int usage(FILE *fp, int exit_status, int is_long_help);
248 
populate_lookup_from_file(const char * subcmd,strhash_t lookup,char * fn)249 static int populate_lookup_from_file(const char *subcmd, strhash_t lookup, char *fn)
250 {
251     FILE *fp;
252     char buf[1024];
253     int ret = 0;
254     fp = fopen(fn, "r");
255     if (fp == NULL) {
256         print_error_errno(subcmd, "failed to open \"%s\" for reading", fn);
257         return -1;
258     }
259 
260     while (ret != -1 && !feof(fp) && fscanf(fp, "%1023s", buf) > 0) {
261         char *d = strdup(buf);
262         if (d != NULL) {
263             kh_put(str, lookup, d, &ret);
264             if (ret == 0) free(d); /* Duplicate */
265         } else {
266             ret = -1;
267         }
268     }
269     if (ferror(fp)) ret = -1;
270     if (ret == -1) {
271         print_error_errno(subcmd, "failed to read \"%s\"", fn);
272     }
273     fclose(fp);
274     return (ret != -1) ? 0 : -1;
275 }
276 
add_read_group_single(const char * subcmd,samview_settings_t * settings,char * name)277 static int add_read_group_single(const char *subcmd, samview_settings_t *settings, char *name)
278 {
279     char *d = strdup(name);
280     int ret = 0;
281 
282     if (d == NULL) goto err;
283 
284     if (settings->rghash == NULL) {
285         settings->rghash = kh_init(str);
286         if (settings->rghash == NULL) goto err;
287     }
288 
289     kh_put(str, settings->rghash, d, &ret);
290     if (ret == -1) goto err;
291     if (ret ==  0) free(d); /* Duplicate */
292     return 0;
293 
294  err:
295     print_error(subcmd, "Couldn't add \"%s\" to read group list: memory exhausted?", name);
296     free(d);
297     return -1;
298 }
299 
add_read_names_file(const char * subcmd,samview_settings_t * settings,char * fn)300 static int add_read_names_file(const char *subcmd, samview_settings_t *settings, char *fn)
301 {
302     if (settings->rnhash == NULL) {
303         settings->rnhash = kh_init(str);
304         if (settings->rnhash == NULL) {
305             perror(NULL);
306             return -1;
307         }
308     }
309     return populate_lookup_from_file(subcmd, settings->rnhash, fn);
310 }
311 
add_read_groups_file(const char * subcmd,samview_settings_t * settings,char * fn)312 static int add_read_groups_file(const char *subcmd, samview_settings_t *settings, char *fn)
313 {
314     if (settings->rghash == NULL) {
315         settings->rghash = kh_init(str);
316         if (settings->rghash == NULL) {
317             perror(NULL);
318             return -1;
319         }
320     }
321     return populate_lookup_from_file(subcmd, settings->rghash, fn);
322 }
323 
add_tag_value_single(const char * subcmd,samview_settings_t * settings,char * name)324 static int add_tag_value_single(const char *subcmd, samview_settings_t *settings, char *name)
325 {
326     char *d = strdup(name);
327     int ret = 0;
328 
329     if (d == NULL) goto err;
330 
331     if (settings->tvhash == NULL) {
332         settings->tvhash = kh_init(str);
333         if (settings->tvhash == NULL) goto err;
334     }
335 
336     kh_put(str, settings->tvhash, d, &ret);
337     if (ret == -1) goto err;
338     if (ret ==  0) free(d); /* Duplicate */
339     return 0;
340 
341  err:
342     print_error(subcmd, "Couldn't add \"%s\" to tag values list: memory exhausted?", name);
343     free(d);
344     return -1;
345 }
346 
add_tag_values_file(const char * subcmd,samview_settings_t * settings,char * fn)347 static int add_tag_values_file(const char *subcmd, samview_settings_t *settings, char *fn)
348 {
349     if (settings->tvhash == NULL) {
350         settings->tvhash = kh_init(str);
351         if (settings->tvhash == NULL) {
352             perror(NULL);
353             return -1;
354         }
355     }
356     return populate_lookup_from_file(subcmd, settings->tvhash, fn);
357 }
358 
check_sam_write1(samFile * fp,const sam_hdr_t * h,const bam1_t * b,const char * fname,int * retp)359 static inline int check_sam_write1(samFile *fp, const sam_hdr_t *h, const bam1_t *b, const char *fname, int *retp)
360 {
361     int r = sam_write1(fp, h, b);
362     if (r >= 0) return r;
363 
364     if (fname) print_error_errno("view", "writing to \"%s\" failed", fname);
365     else print_error_errno("view", "writing to standard output failed");
366 
367     *retp = EXIT_FAILURE;
368     return r;
369 }
370 
change_flag(bam1_t * b,samview_settings_t * settings)371 static inline void change_flag(bam1_t *b, samview_settings_t *settings)
372 {
373     if (settings->add_flag)
374         b->core.flag |= settings->add_flag;
375 
376     if (settings->remove_flag)
377         b->core.flag &= ~settings->remove_flag;
378 }
379 
parse_aux_list(auxhash_t * h,char * optarg)380 int parse_aux_list(auxhash_t *h, char *optarg) {
381     if (!*h)
382         *h = kh_init(aux_exists);
383 
384     while (strlen(optarg) >= 2) {
385         int x = optarg[0]<<8 | optarg[1];
386         int ret = 0;
387         kh_put(aux_exists, *h, x, &ret);
388         if (ret < 0)
389             return -1;
390 
391         optarg += 2;
392         if (*optarg == ',') // allow white-space too for easy `cat file`?
393             optarg++;
394         else if (*optarg != 0)
395             break;
396     }
397 
398     if (strlen(optarg) != 0) {
399         fprintf(stderr, "main_samview: Error parsing option, "
400                 "auxiliary tags should be exactly two characters long.\n");
401         return -1;
402     }
403 
404     return 0;
405 }
406 
407 // Make mnemonic distinct values for longoption-only options
408 #define LONGOPT(c)  ((c) + 128)
409 
main_samview(int argc,char * argv[])410 int main_samview(int argc, char *argv[])
411 {
412     int c, is_header = 0, is_header_only = 0, ret = 0, compress_level = -1, is_count = 0, has_index_file = 0, no_pg = 0;
413     int64_t count = 0;
414     samFile *in = 0, *out = 0, *un_out=0;
415     FILE *fp_out = NULL;
416     sam_hdr_t *header = NULL;
417     char out_mode[6] = {0}, out_un_mode[6] = {0}, *out_format = "";
418     char *fn_in = 0, *fn_idx_in = 0, *fn_out = 0, *fn_fai = 0, *q, *fn_un_out = 0;
419     char *fn_out_idx = NULL, *fn_un_out_idx = NULL, *arg_list = NULL;
420     sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
421     htsThreadPool p = {NULL, 0};
422     int filter_state = ALL, filter_op = 0;
423     int result;
424 
425     samview_settings_t settings = {
426         .rghash = NULL,
427         .tvhash = NULL,
428         .min_mapQ = 0,
429         .flag_on = 0,
430         .flag_off = 0,
431         .flag_alloff = 0,
432         .flag_anyon = 0,
433         .min_qlen = 0,
434         .remove_B = 0,
435         .subsam_seed = 0,
436         .subsam_frac = -1.,
437         .library = NULL,
438         .bed = NULL,
439         .multi_region = 0,
440         .tag = NULL,
441         .filter = NULL,
442         .remove_flag = 0,
443         .add_flag = 0,
444         .keep_tag = NULL,
445         .remove_tag = NULL,
446         .unmap = 0,
447     };
448 
449     static const struct option lopts[] = {
450         SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 'T', '@'),
451         {"add-flags", required_argument, NULL, LONGOPT('a')},
452         {"bam", no_argument, NULL, 'b'},
453         {"count", no_argument, NULL, 'c'},
454         {"cram", no_argument, NULL, 'C'},
455         {"customised-index", no_argument, NULL, 'X'},
456         {"customized-index", no_argument, NULL, 'X'},
457         {"excl-flags", required_argument, NULL, 'F'},
458         {"exclude-flags", required_argument, NULL, 'F'},
459         {"expr", required_argument, NULL, 'e'},
460         {"expression", required_argument, NULL, 'e'},
461         {"fai-reference", required_argument, NULL, 't'},
462         {"fast", no_argument, NULL, '1'},
463         {"header-only", no_argument, NULL, 'H'},
464         {"help", no_argument, NULL, LONGOPT('?')},
465         {"incl-flags", required_argument, NULL, LONGOPT('g')},
466         {"include-flags", required_argument, NULL, LONGOPT('g')},
467         {"rf", required_argument, NULL, LONGOPT('g')}, // aka incl-flags
468         {"keep-tag", required_argument, NULL, LONGOPT('x') },
469         {"library", required_argument, NULL, 'l'},
470         {"min-mapq", required_argument, NULL, 'q'},
471         {"min-MQ", required_argument, NULL, 'q'},
472         {"min-mq", required_argument, NULL, 'q'},
473         {"min-qlen", required_argument, NULL, 'm'},
474         {"no-header", no_argument, NULL, LONGOPT('H')},
475         {"no-PG", no_argument, NULL, LONGOPT('P')},
476         {"output", required_argument, NULL, 'o'},
477         {"output-unselected", required_argument, NULL, 'U'},
478         {"QNAME-file", required_argument, NULL, 'N'},
479         {"qname-file", required_argument, NULL, 'N'},
480         {"read-group", required_argument, NULL, 'r'},
481         {"read-group-file", required_argument, NULL, 'R'},
482         {"readgroup", required_argument, NULL, 'r'},
483         {"readgroup-file", required_argument, NULL, 'R'},
484         {"region-file", required_argument, NULL, LONGOPT('L')},
485         {"regions-file", required_argument, NULL, LONGOPT('L')},
486         {"remove-B", no_argument, NULL, 'B'},
487         {"remove-flags", required_argument, NULL, LONGOPT('r')},
488         {"remove-tag", required_argument, NULL, 'x'},
489         {"require-flags", required_argument, NULL, 'f'},
490         {"subsample", required_argument, NULL, LONGOPT('s')},
491         {"subsample-seed", required_argument, NULL, LONGOPT('S')},
492         {"tag", required_argument, NULL, 'd'},
493         {"tag-file", required_argument, NULL, 'D'},
494         {"target-file", required_argument, NULL, 'L'},
495         {"targets-file", required_argument, NULL, 'L'},
496         {"uncompressed", no_argument, NULL, 'u'},
497         {"unmap", no_argument, NULL, 'p'},
498         {"unoutput", required_argument, NULL, 'U'},
499         {"use-index", no_argument, NULL, 'M'},
500         {"with-header", no_argument, NULL, 'h'},
501     };
502 
503     /* parse command-line options */
504     strcpy(out_mode, "w");
505     strcpy(out_un_mode, "w");
506     if (argc == 1 && isatty(STDIN_FILENO))
507         return usage(stdout, EXIT_SUCCESS, 0);
508 
509     // Suppress complaints about '?' being an unrecognised option.  Without
510     // this we have to put '?' in the options list, which makes it hard to
511     // tell a bad long option from the use of '-?' (both return '?' and
512     // set optopt to '\0').
513     opterr = 0;
514 
515     while ((c = getopt_long(argc, argv,
516                             "SbBcCt:h1Ho:O:q:f:F:G:ul:r:T:R:N:d:D:L:s:@:m:x:U:MXe:p",
517                             lopts, NULL)) >= 0) {
518         switch (c) {
519         case 's':
520             settings.subsam_seed = strtol(optarg, &q, 10);
521             if (q && *q == '.') {
522                 settings.subsam_frac = strtod(q, &q);
523                 if (*q) ret = 1;
524             } else {
525                 ret = 1;
526             }
527 
528             if (ret == 1) {
529                 print_error("view", "Incorrect sampling argument \"%s\"", optarg);
530                 goto view_end;
531             }
532             break;
533         case LONGOPT('s'):
534             settings.subsam_frac = strtod(optarg, &q);
535             if (*q || settings.subsam_frac < 0.0 || settings.subsam_frac > 1.0) {
536                 print_error("view", "Incorrect sampling argument \"%s\"", optarg);
537                 goto view_end;
538             }
539             break;
540         case LONGOPT('S'): settings.subsam_seed = atoi(optarg); break;
541         case 'm': settings.min_qlen = atoi(optarg); break;
542         case 'c': is_count = 1; break;
543         case 'S': break;
544         case 'b': out_format = "b"; break;
545         case 'C': out_format = "c"; break;
546         case 't': fn_fai = strdup(optarg); break;
547         case 'h': is_header = 1; break;
548         case 'H': is_header_only = 1; break;
549         case LONGOPT('H'): is_header = is_header_only = 0; break;
550         case 'o': fn_out = strdup(optarg); break;
551         case 'U': fn_un_out = strdup(optarg); break;
552         case 'X': has_index_file = 1; break;
553         case 'f': settings.flag_on |= bam_str2flag(optarg); break;
554         case 'F': settings.flag_off |= bam_str2flag(optarg); break;
555         case LONGOPT('g'):
556             settings.flag_anyon |= bam_str2flag(optarg); break;
557         case 'G': settings.flag_alloff |= bam_str2flag(optarg); break;
558         case 'q': settings.min_mapQ = atoi(optarg); break;
559         case 'u': compress_level = 0; break;
560         case '1': compress_level = 1; break;
561         case 'l': settings.library = strdup(optarg); break;
562         case 'p': settings.unmap = 1; break;
563         case LONGOPT('L'):
564             settings.multi_region = 1;
565             // fall through
566         case 'L':
567             if ((settings.bed = bed_read(optarg)) == NULL) {
568                 print_error_errno("view", "Could not read file \"%s\"", optarg);
569                 ret = 1;
570                 goto view_end;
571             }
572             break;
573         case 'r':
574             if (add_read_group_single("view", &settings, optarg) != 0) {
575                 ret = 1;
576                 goto view_end;
577             }
578             break;
579         case 'R':
580             if (add_read_groups_file("view", &settings, optarg) != 0) {
581                 ret = 1;
582                 goto view_end;
583             }
584             break;
585         case 'N':
586             if (add_read_names_file("view", &settings, optarg) != 0) {
587                 ret = 1;
588                 goto view_end;
589             }
590             break;
591         case 'd':
592             if (strlen(optarg) < 2 || (strlen(optarg) > 2 && optarg[2] != ':')) {
593                 print_error_errno("view", "Invalid \"tag:value\" option: \"%s\"", optarg);
594                 ret = 1;
595                 goto view_end;
596             }
597 
598             if (settings.tag) {
599                 if (settings.tag[0] != optarg[0] || settings.tag[1] != optarg[1]) {
600                     print_error("view", "Different tag \"%s\" was specified before: \"%s\"", settings.tag, optarg);
601                     ret = 1;
602                     goto view_end;
603                 }
604             } else {
605                 if (!(settings.tag = calloc(3, 1))) {
606                     print_error("view", "Could not allocate memory for tag: \"%s\"", optarg);
607                     ret = 1;
608                     goto view_end;
609                 }
610                 memcpy(settings.tag, optarg, 2);
611             }
612 
613             if (strlen(optarg) > 3 && add_tag_value_single("view", &settings, optarg+3) != 0) {
614                 print_error("view", "Could not add tag:value \"%s\"", optarg);
615                 ret = 1;
616                 goto view_end;
617             }
618             break;
619         case 'D':
620             // Allow ";" as delimiter besides ":" to support MinGW CLI POSIX
621             // path translation as described at:
622             // http://www.mingw.org/wiki/Posix_path_conversion
623             if (strlen(optarg) < 4 || (optarg[2] != ':' && optarg[2] != ';')) {
624                 print_error_errno("view", "Invalid \"tag:file\" option: \"%s\"", optarg);
625                 ret = 1;
626                 goto view_end;
627             }
628 
629             if (settings.tag) {
630                 if (settings.tag[0] != optarg[0] || settings.tag[1] != optarg[1]) {
631                     print_error("view", "Different tag \"%s\" was specified before: \"%s\"", settings.tag, optarg);
632                     ret = 1;
633                     goto view_end;
634                 }
635             } else {
636                 if (!(settings.tag = calloc(3, 1))) {
637                     print_error("view", "Could not allocate memory for tag: \"%s\"", optarg);
638                     ret = 1;
639                     goto view_end;
640                 }
641                 memcpy(settings.tag, optarg, 2);
642             }
643 
644             if (add_tag_values_file("view", &settings, optarg+3) != 0) {
645                 ret = 1;
646                 goto view_end;
647             }
648             break;
649                 /* REMOVED as htslib doesn't support this
650         //case 'x': out_format = "x"; break;
651         //case 'X': out_format = "X"; break;
652                  */
653         case LONGOPT('?'):
654             return usage(stdout, EXIT_SUCCESS, 1);
655         case '?':
656             if (optopt == '?') {  // '-?' appeared on command line
657                 return usage(stdout, EXIT_SUCCESS, 1);
658             } else {
659                 if (optopt) { // Bad short option
660                     print_error("view", "invalid option -- '%c'", optopt);
661                 } else { // Bad long option
662                     // Do our best.  There is no good solution to finding
663                     // out what the bad option was.
664                     // See, e.g. https://stackoverflow.com/questions/2723888/where-does-getopt-long-store-an-unrecognized-option
665                     if (optind > 0 && strncmp(argv[optind - 1], "--", 2) == 0) {
666                         print_error("view", "unrecognised option '%s'",
667                                     argv[optind - 1]);
668                     }
669                 }
670                 return usage(stderr, EXIT_FAILURE, 0);
671             }
672         case 'B': settings.remove_B = 1; break;
673 
674         case 'M': settings.multi_region = 1; break;
675         case LONGOPT('P'): no_pg = 1; break;
676         case 'e':
677             if (!(settings.filter = hts_filter_init(optarg))) {
678                 print_error("main_samview", "Couldn't initialise filter");
679                 return 1;
680             }
681             break;
682         case LONGOPT('r'): settings.remove_flag |= bam_str2flag(optarg); break;
683         case LONGOPT('a'): settings.add_flag |= bam_str2flag(optarg); break;
684 
685         case 'x':
686             if (*optarg == '^') {
687                 if (parse_aux_list(&settings.keep_tag, optarg+1))
688                     return usage(stderr, EXIT_FAILURE, 0);
689             } else {
690                 if (parse_aux_list(&settings.remove_tag, optarg))
691                     return usage(stderr, EXIT_FAILURE, 0);
692             }
693             break;
694 
695         case LONGOPT('x'):
696             if (parse_aux_list(&settings.keep_tag, optarg))
697                 return usage(stderr, EXIT_FAILURE, 0);
698             break;
699 
700         default:
701             if (parse_sam_global_opt(c, optarg, lopts, &ga) != 0)
702                 return usage(stderr, EXIT_FAILURE, 0);
703             break;
704         }
705     }
706     if (fn_fai == 0 && ga.reference) fn_fai = fai_path(ga.reference);
707     if (compress_level >= 0 && !*out_format) out_format = "b";
708     if (is_header_only) is_header = 1;
709     // File format auto-detection first
710     if (fn_out)    sam_open_mode(out_mode+1,    fn_out,    NULL);
711     if (fn_un_out) sam_open_mode(out_un_mode+1, fn_un_out, NULL);
712     // Overridden by manual -b, -C
713     if (*out_format)
714         out_mode[1] = out_un_mode[1] = *out_format;
715     // out_(un_)mode now 1, 2 or 3 bytes long, followed by nul.
716     if (compress_level >= 0) {
717         char tmp[2];
718         tmp[0] = compress_level + '0'; tmp[1] = '\0';
719         strcat(out_mode, tmp);
720         strcat(out_un_mode, tmp);
721     }
722     if (argc == optind && isatty(STDIN_FILENO)) {
723         print_error("view", "No input provided or missing option argument.");
724         return usage(stderr, EXIT_FAILURE, 0); // potential memory leak...
725     }
726 
727     if (settings.unmap && fn_un_out) {
728         print_error("view", "Options --unoutput and --unmap are mutually exclusive.");
729         ret = 1;
730         goto view_end;
731     }
732 
733     if (settings.subsam_seed != 0) {
734         // Convert likely user input 1,2,... to pseudo-random
735         // values with more entropy and more bits set
736         srand(settings.subsam_seed);
737         settings.subsam_seed = rand();
738     }
739 
740     fn_in = (optind < argc)? argv[optind] : "-";
741     if ((in = sam_open_format(fn_in, "r", &ga.in)) == 0) {
742         print_error_errno("view", "failed to open \"%s\" for reading", fn_in);
743         ret = 1;
744         goto view_end;
745     }
746 
747     if (fn_fai) {
748         if (hts_set_fai_filename(in, fn_fai) != 0) {
749             fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_fai);
750             ret = 1;
751             goto view_end;
752         }
753     }
754     if ((header = sam_hdr_read(in)) == 0) {
755         fprintf(stderr, "[main_samview] fail to read the header from \"%s\".\n", fn_in);
756         ret = 1;
757         goto view_end;
758     }
759     if (settings.rghash) {
760         sam_hdr_remove_lines(header, "RG", "ID", settings.rghash);
761     }
762     if (!is_count) {
763         if ((out = sam_open_format(fn_out? fn_out : "-", out_mode, &ga.out)) == 0) {
764             print_error_errno("view", "failed to open \"%s\" for writing", fn_out? fn_out : "standard output");
765             ret = 1;
766             goto view_end;
767         }
768         if (fn_fai) {
769             if (hts_set_fai_filename(out, fn_fai) != 0) {
770                 fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_fai);
771                 ret = 1;
772                 goto view_end;
773             }
774         }
775         autoflush_if_stdout(out, fn_out);
776 
777         if (!no_pg) {
778             if (!(arg_list = stringify_argv(argc+1, argv-1))) {
779                 print_error("view", "failed to create arg_list");
780                 ret = 1;
781                 goto view_end;
782             }
783             if (sam_hdr_add_pg(header, "samtools",
784                                          "VN", samtools_version(),
785                                          arg_list ? "CL": NULL,
786                                          arg_list ? arg_list : NULL,
787                                          NULL)) {
788                 print_error("view", "failed to add PG line to the header");
789                 ret = 1;
790                 goto view_end;
791             }
792         }
793 
794         if (*out_format || ga.write_index || is_header ||
795             out_mode[1] == 'b' || out_mode[1] == 'c' ||
796             (ga.out.format != sam && ga.out.format != unknown_format))  {
797             if (sam_hdr_write(out, header) != 0) {
798                 fprintf(stderr, "[main_samview] failed to write the SAM header\n");
799                 ret = 1;
800                 goto view_end;
801             }
802         }
803         if (ga.write_index) {
804             if (!(fn_out_idx = auto_index(out, fn_out, header))) {
805                 ret = 1;
806                 goto view_end;
807             }
808         }
809 
810         if (fn_un_out) {
811             if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) {
812                 print_error_errno("view", "failed to open \"%s\" for writing", fn_un_out);
813                 ret = 1;
814                 goto view_end;
815             }
816             if (fn_fai) {
817                 if (hts_set_fai_filename(un_out, fn_fai) != 0) {
818                     fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_fai);
819                     ret = 1;
820                     goto view_end;
821                 }
822             }
823             autoflush_if_stdout(un_out, fn_un_out);
824             if (*out_format || is_header ||
825                 out_un_mode[1] == 'b' || out_un_mode[1] == 'c' ||
826                 (ga.out.format != sam && ga.out.format != unknown_format))  {
827                 if (sam_hdr_write(un_out, header) != 0) {
828                     fprintf(stderr, "[main_samview] failed to write the SAM header\n");
829                     ret = 1;
830                     goto view_end;
831                 }
832             }
833             if (ga.write_index) {
834                 if (!(fn_un_out_idx = auto_index(un_out, fn_un_out, header))) {
835                     ret = 1;
836                     goto view_end;
837                 }
838             }
839         }
840     }
841     else {
842         if (fn_out) {
843             fp_out = fopen(fn_out, "w");
844             if (fp_out == NULL) {
845                 print_error_errno("view", "can't create \"%s\"", fn_out);
846                 ret = EXIT_FAILURE;
847                 goto view_end;
848             }
849         }
850     }
851 
852     if (ga.nthreads > 1) {
853         if (!(p.pool = hts_tpool_init(ga.nthreads))) {
854             fprintf(stderr, "Error creating thread pool\n");
855             ret = 1;
856             goto view_end;
857         }
858         hts_set_opt(in,  HTS_OPT_THREAD_POOL, &p);
859         if (out) hts_set_opt(out, HTS_OPT_THREAD_POOL, &p);
860     }
861     if (is_header_only) goto view_end; // no need to print alignments
862 
863     if (has_index_file) {
864         fn_idx_in = (optind+1 < argc)? argv[optind+1] : 0;
865         if (fn_idx_in == 0) {
866             fprintf(stderr, "[main_samview] incorrect number of arguments for -X option. Aborting.\n");
867             return 1;
868         }
869     }
870 
871     if (settings.multi_region) {
872         if (!has_index_file && optind < argc - 1) { //regions have been specified in the command line
873             settings.bed = bed_hash_regions(settings.bed, argv, optind+1, argc, &filter_op); //insert(1) or filter out(0) the regions from the command line in the same hash table as the bed file
874             if (!filter_op)
875                 filter_state = FILTERED;
876         } else if (has_index_file && optind < argc - 2) {
877             settings.bed = bed_hash_regions(settings.bed, argv, optind+2, argc, &filter_op); //insert(1) or filter out(0) the regions from the command line in the same hash table as the bed file
878             if (!filter_op)
879                 filter_state = FILTERED;
880         } else {
881             bed_unify(settings.bed);
882         }
883 
884         bam1_t *b = bam_init1();
885         if (settings.bed == NULL) { // index is unavailable or no regions have been specified
886             fprintf(stderr, "[main_samview] no regions or BED file have been provided. Aborting.\n");
887         } else {
888             hts_idx_t *idx = NULL;
889             // If index filename has not been specfied, look in BAM folder
890             if (fn_idx_in != 0) {
891                 idx = sam_index_load2(in, fn_in, fn_idx_in); // load index
892             } else {
893                 idx = sam_index_load(in, fn_in);
894             }
895             if (idx != NULL) {
896 
897                 int regcount = 0;
898 
899                 hts_reglist_t *reglist = bed_reglist(settings.bed, filter_state, &regcount);
900                 if(reglist) {
901                     hts_itr_multi_t *iter = sam_itr_regions(idx, header, reglist, regcount);
902                     if (iter) {
903                         // fetch alignments
904                         while ((result = sam_itr_multi_next(in, iter, b)) >= 0) {
905                             if (!process_aln(header, b, &settings)) {
906                                 if (!is_count) {
907                                     change_flag(b, &settings);
908                                     if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break;
909                                 }
910                                 count++;
911                             } else if (settings.unmap) {
912                                 b->core.flag |= BAM_FUNMAP;
913                                 if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break;
914                             } else {
915                                 if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
916                             }
917                         }
918                         if (result < -1) {
919                             print_error("view", "retrieval of region %d failed due to truncated file or corrupt BAM index file", iter->curr_tid);
920                             ret = 1;
921                         }
922 
923                         hts_itr_multi_destroy(iter);
924                     } else {
925                         fprintf(stderr, "[main_samview] iterator could not be created. Aborting.\n");
926                     }
927                 } else {
928                     fprintf(stderr, "[main_samview] region list is empty or could not be created. Aborting.\n");
929                 }
930                 hts_idx_destroy(idx); // destroy the BAM index
931             } else {
932                 fprintf(stderr, "[main_samview] random alignment retrieval only works for indexed BAM or CRAM files.\n");
933             }
934         }
935         bam_destroy1(b);
936     } else {
937         if ((has_index_file && optind >= argc - 2) || (!has_index_file && optind >= argc - 1)) { // convert/print the entire file
938             bam1_t *b = bam_init1();
939             int r;
940             errno = 0;
941             while ((r = sam_read1(in, header, b)) >= 0) { // read one alignment from `in'
942                 if (!process_aln(header, b, &settings)) {
943                     if (!is_count) {
944                         change_flag(b, &settings);
945                         if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break;
946                     }
947                     count++;
948                 } else if (settings.unmap) {
949                     b->core.flag |= BAM_FUNMAP;
950                     if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break;
951                 } else {
952                     if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
953                 }
954             }
955             if (r < -1) {
956                 print_error_errno("view", "error reading file \"%s\"", fn_in);
957                 ret = 1;
958             }
959             bam_destroy1(b);
960         } else { // retrieve alignments in specified regions
961             int i;
962             bam1_t *b;
963             hts_idx_t *idx = NULL;
964             // If index filename has not been specfied, look in BAM folder
965             if (fn_idx_in != NULL) {
966                 idx = sam_index_load2(in, fn_in, fn_idx_in); // load index
967             } else {
968                 idx = sam_index_load(in, fn_in);
969             }
970             if (idx == 0) { // index is unavailable
971                 fprintf(stderr, "[main_samview] random alignment retrieval only works for indexed BAM or CRAM files.\n");
972                 ret = 1;
973                 goto view_end;
974             }
975             b = bam_init1();
976 
977             for (i = (has_index_file)? optind+2 : optind+1; i < argc; ++i) {
978                 int result;
979                 hts_itr_t *iter = sam_itr_querys(idx, header, argv[i]); // parse a region in the format like `chr2:100-200'
980                 if (iter == NULL) { // region invalid or reference name not found
981                     fprintf(stderr, "[main_samview] region \"%s\" specifies an invalid region or unknown reference. Continue anyway.\n", argv[i]);
982                     continue;
983                 }
984                 // fetch alignments
985                 while ((result = sam_itr_next(in, iter, b)) >= 0) {
986                     if (!process_aln(header, b, &settings)) {
987                         if (!is_count) {
988                             change_flag(b, &settings);
989                             if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break;
990                         }
991                         count++;
992                     } else if (settings.unmap) {
993                         b->core.flag |= BAM_FUNMAP;
994                         if (check_sam_write1(out, header, b, fn_out, &ret) < 0) break;
995                     } else {
996                         if (un_out) { if (check_sam_write1(un_out, header, b, fn_un_out, &ret) < 0) break; }
997                     }
998                 }
999                 hts_itr_destroy(iter);
1000                 if (result < -1) {
1001                     print_error("view", "retrieval of region \"%s\" failed due to truncated file or corrupt BAM index file", argv[i]);
1002                     ret = 1;
1003                     break;
1004                 }
1005             }
1006             bam_destroy1(b);
1007             hts_idx_destroy(idx); // destroy the BAM index
1008         }
1009     }
1010 
1011     if (ga.write_index) {
1012         if (sam_idx_save(out) < 0) {
1013             print_error_errno("view", "writing index failed");
1014             ret = 1;
1015         }
1016         if (un_out && sam_idx_save(un_out) < 0) {
1017             print_error_errno("view", "writing index failed");
1018             ret = 1;
1019         }
1020     }
1021 
1022 view_end:
1023     if (is_count && ret == 0) {
1024         if (fprintf(fn_out? fp_out : stdout, "%" PRId64 "\n", count) < 0) {
1025             if (fn_out) print_error_errno("view", "writing to \"%s\" failed", fn_out);
1026             else print_error_errno("view", "writing to standard output failed");
1027             ret = EXIT_FAILURE;
1028         }
1029     }
1030 
1031     // close files, free and return
1032     if (in) check_sam_close("view", in, fn_in, "standard input", &ret);
1033     if (out) check_sam_close("view", out, fn_out, "standard output", &ret);
1034     if (un_out) check_sam_close("view", un_out, fn_un_out, "file", &ret);
1035     if (fp_out) fclose(fp_out);
1036 
1037     free(fn_fai); free(fn_out); free(settings.library);  free(fn_un_out);
1038     sam_global_args_free(&ga);
1039     if ( header ) sam_hdr_destroy(header);
1040     if (settings.bed) bed_destroy(settings.bed);
1041     if (settings.rghash) {
1042         khint_t k;
1043         for (k = 0; k < kh_end(settings.rghash); ++k)
1044             if (kh_exist(settings.rghash, k)) free((char*)kh_key(settings.rghash, k));
1045         kh_destroy(str, settings.rghash);
1046     }
1047     if (settings.rnhash) {
1048         khint_t k;
1049         for (k = 0; k < kh_end(settings.rnhash); ++k)
1050             if (kh_exist(settings.rnhash, k)) free((char*)kh_key(settings.rnhash, k));
1051         kh_destroy(str, settings.rnhash);
1052     }
1053     if (settings.tvhash) {
1054         khint_t k;
1055         for (k = 0; k < kh_end(settings.tvhash); ++k)
1056             if (kh_exist(settings.tvhash, k)) free((char*)kh_key(settings.tvhash, k));
1057         kh_destroy(str, settings.tvhash);
1058     }
1059 
1060     if (settings.remove_aux_len) {
1061         free(settings.remove_aux);
1062     }
1063     if (settings.tag) {
1064         free(settings.tag);
1065     }
1066     if (settings.filter)
1067         hts_filter_free(settings.filter);
1068 
1069     if (p.pool)
1070         hts_tpool_destroy(p.pool);
1071 
1072     if (fn_out_idx)
1073         free(fn_out_idx);
1074     if (fn_un_out_idx)
1075         free(fn_un_out_idx);
1076     free(arg_list);
1077 
1078     if (settings.keep_tag)
1079         kh_destroy(aux_exists, settings.keep_tag);
1080     if (settings.remove_tag)
1081         kh_destroy(aux_exists, settings.remove_tag);
1082 
1083     return ret;
1084 }
1085 
usage(FILE * fp,int exit_status,int is_long_help)1086 static int usage(FILE *fp, int exit_status, int is_long_help)
1087 {
1088     fprintf(fp,
1089 "\n"
1090 "Usage: samtools view [options] <in.bam>|<in.sam>|<in.cram> [region ...]\n"
1091 "\n"
1092 
1093 "Output options:\n"
1094 "  -b, --bam                  Output BAM\n"
1095 "  -C, --cram                 Output CRAM (requires -T)\n"
1096 "  -1, --fast                 Use fast BAM compression (implies --bam)\n"
1097 "  -u, --uncompressed         Uncompressed BAM output (implies --bam)\n"
1098 "  -h, --with-header          Include header in SAM output\n"
1099 "  -H, --header-only          Print SAM header only (no alignments)\n"
1100 "      --no-header            Print SAM alignment records only [default]\n"
1101 "  -c, --count                Print only the count of matching records\n"
1102 "  -o, --output FILE          Write output to FILE [standard output]\n"
1103 "  -U, --unoutput FILE, --output-unselected FILE\n"
1104 "                             Output reads not selected by filters to FILE\n"
1105 "  -p, --unmap                Set flag to UNMAP on reads not selected\n"
1106 "                             then write to output file.\n"
1107 "Input options:\n"
1108 "  -t, --fai-reference FILE   FILE listing reference names and lengths\n"
1109 "  -M, --use-index            Use index and multi-region iterator for regions\n"
1110 "      --region[s]-file FILE  Use index to include only reads overlapping FILE\n"
1111 "  -X, --customized-index     Expect extra index file argument after <in.bam>\n"
1112 "\n"
1113 "Filtering options (Only include in output reads that...):\n"
1114 "  -L, --target[s]-file FILE  ...overlap (BED) regions in FILE\n"
1115 "  -r, --read-group STR       ...are in read group STR\n"
1116 "  -R, --read-group-file FILE ...are in a read group listed in FILE\n"
1117 "  -N, --qname-file FILE      ...whose read name is listed in FILE\n"
1118 "  -d, --tag STR1[:STR2]      ...have a tag STR1 (with associated value STR2)\n"
1119 "  -D, --tag-file STR:FILE    ...have a tag STR whose value is listed in FILE\n"
1120 "  -q, --min-MQ INT           ...have mapping quality >= INT\n"
1121 "  -l, --library STR          ...are in library STR\n"
1122 "  -m, --min-qlen INT         ...cover >= INT query bases (as measured via CIGAR)\n"
1123 "  -e, --expr STR             ...match the filter expression STR\n"
1124 "  -f, --require-flags FLAG   ...have all of the FLAGs present\n"             //   F&x == x
1125 "  -F, --excl[ude]-flags FLAG ...have none of the FLAGs present\n"            //   F&x == 0
1126 "  -G FLAG                    EXCLUDE reads with all of the FLAGs present\n"  // !(F&x == x)  TODO long option
1127 "      --subsample FLOAT      Keep only FLOAT fraction of templates/read pairs\n"
1128 "      --subsample-seed INT   Influence WHICH reads are kept in subsampling [0]\n"
1129 "  -s INT.FRAC                Same as --subsample 0.FRAC --subsample-seed INT\n"
1130 "\n"
1131 "Processing options:\n"
1132 "      --add-flags FLAG       Add FLAGs to reads\n"
1133 "      --remove-flags FLAG    Remove FLAGs from reads\n"
1134 "  -x, --remove-tag STR\n"
1135 "               Comma-separated read tags to strip (repeatable) [null]\n"
1136 "      --keep-tag STR\n"
1137 "               Comma-separated read tags to preserve (repeatable) [null].\n"
1138 "               Equivalent to \"-x ^STR\"\n"
1139 "  -B, --remove-B             Collapse the backward CIGAR operation\n"
1140 "\n"
1141 "General options:\n"
1142 "  -?, --help   Print long help, including note about region specification\n"
1143 "  -S           Ignored (input format is auto-detected)\n"
1144 "      --no-PG  Do not add a PG line\n");
1145 
1146     sam_global_opt_help(fp, "-.O.T@..");
1147     fprintf(fp, "\n");
1148 
1149     if (is_long_help)
1150         fprintf(fp,
1151 "Notes:\n"
1152 "\n"
1153 "1. This command now auto-detects the input format (BAM/CRAM/SAM).\n"
1154 "   Further control over the CRAM format can be specified by using the\n"
1155 "   --output-fmt-option, e.g. to specify the number of sequences per slice\n"
1156 "   and to use avoid reference based compression:\n"
1157 "\n"
1158 "\tsamtools view -C --output-fmt-option seqs_per_slice=5000 \\\n"
1159 "\t   --output-fmt-option no_ref -o out.cram in.bam\n"
1160 "\n"
1161 "   Options can also be specified as a comma separated list within the\n"
1162 "   --output-fmt value too.  For example this is equivalent to the above\n"
1163 "\n"
1164 "\tsamtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n"
1165 "\t   -o out.cram in.bam\n"
1166 "\n"
1167 "2. The file supplied with `-t' is SPACE/TAB delimited with the first\n"
1168 "   two fields of each line consisting of the reference name and the\n"
1169 "   corresponding sequence length. The `.fai' file generated by \n"
1170 "   `samtools faidx' is suitable for use as this file. This may be an\n"
1171 "   empty file if reads are unaligned.\n"
1172 "\n"
1173 "3. SAM->BAM conversion:  samtools view -bT ref.fa in.sam.gz\n"
1174 "\n"
1175 "4. BAM->SAM conversion:  samtools view -h in.bam\n"
1176 "\n"
1177 "5. A region should be presented in one of the following formats:\n"
1178 "   `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n"
1179 "   specified, the input alignment file must be a sorted and indexed\n"
1180 "   alignment (BAM/CRAM) file.\n"
1181 "\n"
1182 "6. Option `-u' is preferred over `-b' when the output is piped to\n"
1183 "   another samtools command.\n"
1184 "\n"
1185 "7. Option `-M`/`--use-index` causes overlaps with `-L` BED file regions and\n"
1186 "   command-line region arguments to be computed using the multi-region iterator\n"
1187 "   and an index. This increases speed, omits duplicates, and outputs the reads\n"
1188 "   as they are ordered in the input SAM/BAM/CRAM file.\n"
1189 "\n"
1190 "8. Options `-L`/`--target[s]-file` and `--region[s]-file` may not be used\n"
1191 "   together. `--region[s]-file FILE` is simply equivalent to `-M -L FILE`,\n"
1192 "   so using both causes one of the specified BED files to be ignored.\n"
1193 "\n");
1194 
1195     return exit_status;
1196 }
1197