1 #include "samtools.pysam.h"
2 
3 /*  bam_plcmd.c -- mpileup subcommand.
4 
5     Copyright (C) 2008-2015, 2019-2021 Genome Research Ltd.
6     Portions copyright (C) 2009-2012 Broad Institute.
7 
8     Author: Heng Li <lh3@sanger.ac.uk>
9 
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16 
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 DEALINGS IN THE SOFTWARE.  */
27 
28 #include <config.h>
29 
30 #include <math.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <ctype.h>
35 #include <string.h>
36 #include <strings.h>
37 #include <limits.h>
38 #include <errno.h>
39 #include <sys/stat.h>
40 #include <getopt.h>
41 #include <inttypes.h>
42 #include <htslib/sam.h>
43 #include <htslib/faidx.h>
44 #include <htslib/kstring.h>
45 #include <htslib/klist.h>
46 #include <htslib/khash_str2int.h>
47 #include <htslib/cram.h>
48 #include "samtools.h"
49 #include "bedidx.h"
50 #include "sam_opts.h"
51 
52 #define dummy_free(p)
KLIST_INIT(auxlist,char *,dummy_free)53 KLIST_INIT(auxlist, char *, dummy_free)
54 
55 static inline int printw(int c, FILE *fp)
56 {
57     char buf[16];
58     int l, x;
59     if (c == 0) return fputc('0', fp);
60     for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
61     if (c < 0) buf[l++] = '-';
62     buf[l] = 0;
63     for (x = 0; x < l/2; ++x) {
64         int y = buf[x]; buf[x] = buf[l-1-x]; buf[l-1-x] = y;
65     }
66     fputs(buf, fp);
67     return 0;
68 }
69 
pileup_seq(FILE * fp,const bam_pileup1_t * p,hts_pos_t pos,hts_pos_t ref_len,const char * ref,kstring_t * ks,int rev_del,int no_ins,int no_ins_mods,int no_del,int no_ends)70 static inline int pileup_seq(FILE *fp, const bam_pileup1_t *p, hts_pos_t pos,
71                              hts_pos_t ref_len, const char *ref, kstring_t *ks,
72                              int rev_del, int no_ins, int no_ins_mods,
73                              int no_del, int no_ends)
74 {
75     no_ins_mods |= no_ins;
76     int j;
77     hts_base_mod_state *m = p->cd.p;
78     if (!no_ends && p->is_head) {
79         putc('^', fp);
80         putc(p->b->core.qual > 93? 126 : p->b->core.qual + 33, fp);
81     }
82     if (!p->is_del) {
83         int c = p->qpos < p->b->core.l_qseq
84             ? seq_nt16_str[bam_seqi(bam_get_seq(p->b), p->qpos)]
85             : 'N';
86         if (ref) {
87             int rb = pos < ref_len? ref[pos] : 'N';
88             if (c == '=' || seq_nt16_table[c] == seq_nt16_table[rb]) c = bam_is_rev(p->b)? ',' : '.';
89             else c = bam_is_rev(p->b)? tolower(c) : toupper(c);
90         } else {
91             if (c == '=') c = bam_is_rev(p->b)? ',' : '.';
92             else c = bam_is_rev(p->b)? tolower(c) : toupper(c);
93         }
94         putc(c, fp);
95         if (m) {
96             int nm;
97             hts_base_mod mod[256];
98             if ((nm = bam_mods_at_qpos(p->b, p->qpos, m, mod, 256)) > 0) {
99                 putc('[', fp);
100                 int j;
101                 for (j = 0; j < nm && j < 256; j++) {
102                     char qual[20];
103                     if (mod[j].qual >= 0)
104                         sprintf(qual, "%d", mod[j].qual);
105                     else
106                         *qual = 0;
107                     if (mod[j].modified_base < 0)
108                         // ChEBI
109                         fprintf(fp, "%c(%d)%s", "+-"[mod[j].strand],
110                                 -mod[j].modified_base, qual);
111                     else
112                         fprintf(fp, "%c%c%s", "+-"[mod[j].strand],
113                                 mod[j].modified_base, qual);
114                 }
115                 putc(']', fp);
116             }
117         }
118     } else putc(p->is_refskip? (bam_is_rev(p->b)? '<' : '>') : ((bam_is_rev(p->b) && rev_del) ? '#' : '*'), fp);
119     int del_len = -p->indel;
120     if (p->indel > 0) {
121         int len = bam_plp_insertion_mod(p, m && !no_ins_mods ? m : NULL,
122                                         ks, &del_len);
123         if (len < 0) {
124             print_error("mpileup", "bam_plp_insertion() failed");
125             return -1;
126         }
127         if (no_ins < 2) {
128             putc('+', fp);
129             printw(len, fp);
130         }
131         if (!no_ins) {
132             if (bam_is_rev(p->b)) {
133                 char pad = rev_del ? '#' : '*';
134                 int in_mod = 0;
135                 for (j = 0; j < ks->l; j++) {
136                     if (ks->s[j] == '[') in_mod = 1;
137                     else if (ks->s[j] == ']') in_mod = 0;
138                     putc(ks->s[j] != '*'
139                          ? (in_mod ? ks->s[j] : tolower(ks->s[j]))
140                          : pad, fp);
141                 }
142             } else {
143                 int in_mod = 0;
144                 for (j = 0; j < ks->l; j++) {
145                     if (ks->s[j] == '[') in_mod = 1;
146                     if (ks->s[j] == ']') in_mod = 0;
147                     putc(in_mod ? ks->s[j] : toupper(ks->s[j]), fp);
148                 }
149             }
150         }
151     }
152     if (del_len > 0) {
153         if (no_del < 2)
154             printw(-del_len, fp);
155         if (!no_del) {
156             for (j = 1; j <= del_len; ++j) {
157                 int c = (ref && (int)pos+j < ref_len)? ref[pos+j] : 'N';
158                 putc(bam_is_rev(p->b)? tolower(c) : toupper(c), fp);
159             }
160         }
161     }
162     if (!no_ends && p->is_tail) putc('$', fp);
163     return 0;
164 }
165 
166 #include <assert.h>
167 #include "bam2bcf.h"
168 #include "sample.h"
169 
170 #define MPLP_BCF        1
171 #define MPLP_VCF        (1<<1)
172 #define MPLP_NO_COMP    (1<<2)
173 #define MPLP_NO_ORPHAN  (1<<3)
174 #define MPLP_REALN      (1<<4)
175 #define MPLP_NO_INDEL   (1<<5)
176 #define MPLP_REDO_BAQ   (1<<6)
177 #define MPLP_ILLUMINA13 (1<<7)
178 #define MPLP_IGNORE_RG  (1<<8)
179 #define MPLP_PER_SAMPLE (1<<9)
180 #define MPLP_SMART_OVERLAPS (1<<10)
181 
182 #define MPLP_PRINT_MAPQ_CHAR (1<<11)
183 #define MPLP_PRINT_QPOS  (1<<12)
184 #define MPLP_PRINT_QNAME (1<<13)
185 #define MPLP_PRINT_FLAG  (1<<14)
186 #define MPLP_PRINT_RNAME (1<<15)
187 #define MPLP_PRINT_POS   (1<<16)
188 #define MPLP_PRINT_MAPQ  (1<<17)
189 #define MPLP_PRINT_CIGAR (1<<18)
190 #define MPLP_PRINT_RNEXT (1<<19)
191 #define MPLP_PRINT_PNEXT (1<<20)
192 #define MPLP_PRINT_TLEN  (1<<21)
193 #define MPLP_PRINT_SEQ   (1<<22)
194 #define MPLP_PRINT_QUAL  (1<<23)
195 #define MPLP_PRINT_MODS  (1<<24)
196 #define MPLP_PRINT_QPOS5 (1<<25)
197 
198 #define MPLP_PRINT_LAST  (1<<26) // terminator for loop
199 
200 #define MPLP_MAX_DEPTH 8000
201 #define MPLP_MAX_INDEL_DEPTH 250
202 
203 typedef struct {
204     int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag, all, rev_del;
205     int rflag_require, rflag_filter;
206     int openQ, extQ, tandemQ, min_support; // for indels
207     double min_frac; // for indels
208     char *reg, *pl_list, *fai_fname, *output_fname;
209     faidx_t *fai;
210     void *bed, *rghash, *auxlist;
211     int argc;
212     char **argv;
213     char sep, empty, no_ins, no_ins_mods, no_del, no_ends;
214     sam_global_args ga;
215 } mplp_conf_t;
216 
217 typedef struct {
218     char *ref[2];
219     int ref_id[2];
220     hts_pos_t ref_len[2];
221 } mplp_ref_t;
222 
223 #define MPLP_REF_INIT {{NULL,NULL},{-1,-1},{0,0}}
224 
225 typedef struct {
226     samFile *fp;
227     hts_itr_t *iter;
228     sam_hdr_t *h;
229     mplp_ref_t *ref;
230     const mplp_conf_t *conf;
231 } mplp_aux_t;
232 
233 typedef struct {
234     int n;
235     int *n_plp, *m_plp;
236     bam_pileup1_t **plp;
237 } mplp_pileup_t;
238 
build_auxlist(mplp_conf_t * conf,char * optstring)239 static int build_auxlist(mplp_conf_t *conf, char *optstring) {
240     if (!optstring)
241         return 0;
242 
243     void *colhash = khash_str2int_init();
244     if (!colhash)
245         return 1;
246 
247     struct active_cols {
248         char *name;
249         int supported;
250     };
251 
252     const struct active_cols colnames[11] = {
253             {"QNAME", 1}, {"FLAG", 1}, {"RNAME", 1}, {"POS", 1}, {"MAPQ", 1}, {"CIGAR", 0}, {"RNEXT", 1}, {"PNEXT", 1}, {"TLEN", 0}, {"SEQ", 0}, {"QUAL", 0}
254     };
255 
256     int i, f = MPLP_PRINT_QNAME, colno = 11;
257     for (i = 0; i < colno; i++, f <<= 1)
258         if (colnames[i].supported)
259             khash_str2int_set(colhash, colnames[i].name, f);
260 
261     conf->auxlist = kl_init(auxlist);
262     if (!conf->auxlist)
263         return 1;
264 
265     char *save_p;
266     char *tag = strtok_r(optstring, ",", &save_p);
267     while (tag) {
268         if (khash_str2int_get(colhash, tag, &f) == 0) {
269             conf->flag |= f;
270         } else {
271             if (strlen(tag) != 2) {
272                 fprintf(samtools_stderr, "[%s] tag '%s' has more than two characters or not supported\n", __func__, tag);
273             } else {
274                 char **tag_p = kl_pushp(auxlist, conf->auxlist);
275                 *tag_p = tag;
276             }
277         }
278         tag = strtok_r(NULL, ",", &save_p);
279     }
280 
281     khash_str2int_destroy(colhash);
282 
283     return 0;
284 }
285 
mplp_get_ref(mplp_aux_t * ma,int tid,char ** ref,hts_pos_t * ref_len)286 static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, hts_pos_t *ref_len) {
287     mplp_ref_t *r = ma->ref;
288 
289     //printf("get ref %d {%d/%p, %d/%p}\n", tid, r->ref_id[0], r->ref[0], r->ref_id[1], r->ref[1]);
290 
291     if (!r || !ma->conf->fai) {
292         *ref = NULL;
293         return 0;
294     }
295 
296     // Do we need to reference count this so multiple mplp_aux_t can
297     // track which references are in use?
298     // For now we just cache the last two. Sufficient?
299     if (tid == r->ref_id[0]) {
300         *ref = r->ref[0];
301         *ref_len = r->ref_len[0];
302         return 1;
303     }
304     if (tid == r->ref_id[1]) {
305         // Last, swap over
306         int tmp_id;
307         hts_pos_t tmp_len;
308         tmp_id  = r->ref_id[0];  r->ref_id[0]  = r->ref_id[1];  r->ref_id[1]  = tmp_id;
309         tmp_len = r->ref_len[0]; r->ref_len[0] = r->ref_len[1]; r->ref_len[1] = tmp_len;
310 
311         char *tc;
312         tc = r->ref[0]; r->ref[0] = r->ref[1]; r->ref[1] = tc;
313         *ref = r->ref[0];
314         *ref_len = r->ref_len[0];
315         return 1;
316     }
317 
318     // New, so migrate to old and load new
319     free(r->ref[1]);
320     r->ref[1]     = r->ref[0];
321     r->ref_id[1]  = r->ref_id[0];
322     r->ref_len[1] = r->ref_len[0];
323 
324     r->ref_id[0] = tid;
325     r->ref[0] = faidx_fetch_seq64(ma->conf->fai,
326                                 sam_hdr_tid2name(ma->h, r->ref_id[0]),
327                                 0,
328                                 HTS_POS_MAX,
329                                 &r->ref_len[0]);
330 
331     if (!r->ref[0]) {
332         r->ref[0] = NULL;
333         r->ref_id[0] = -1;
334         r->ref_len[0] = 0;
335         *ref = NULL;
336         return 0;
337     }
338 
339     *ref = r->ref[0];
340     *ref_len = r->ref_len[0];
341     return 1;
342 }
343 
344 // Initialise and destroy the base modifier state data. This is called
345 // as each new read is added or removed from the pileups.
346 static
pileup_cd_create(void * data,const bam1_t * b,bam_pileup_cd * cd)347 int pileup_cd_create(void *data, const bam1_t *b, bam_pileup_cd *cd) {
348     int ret;
349     hts_base_mod_state *m = hts_base_mod_state_alloc();
350     ret = bam_parse_basemod(b, m);
351     cd->p = m;
352     return ret;
353 }
354 
355 static
pileup_cd_destroy(void * data,const bam1_t * b,bam_pileup_cd * cd)356 int pileup_cd_destroy(void *data, const bam1_t *b, bam_pileup_cd *cd) {
357     hts_base_mod_state_free(cd->p);
358     return 0;
359 }
360 
361 static void
print_empty_pileup(FILE * fp,const mplp_conf_t * conf,const char * tname,hts_pos_t pos,int n,const char * ref,hts_pos_t ref_len)362 print_empty_pileup(FILE *fp, const mplp_conf_t *conf, const char *tname,
363                    hts_pos_t pos, int n, const char *ref, hts_pos_t ref_len)
364 {
365     int i;
366     fprintf(fp, "%s\t%"PRIhts_pos"\t%c", tname, pos+1, (ref && pos < ref_len)? ref[pos] : 'N');
367     for (i = 0; i < n; ++i) {
368         fputs("\t0\t*\t*", fp);
369         int flag_value = MPLP_PRINT_MAPQ_CHAR;
370         while(flag_value < MPLP_PRINT_LAST) {
371             if (flag_value != MPLP_PRINT_MODS && (conf->flag & flag_value))
372                 fputs("\t*", fp);
373             flag_value <<= 1;
374         }
375         if (conf->auxlist) {
376             int t = 0;
377             while(t++ < ((klist_t(auxlist) *)conf->auxlist)->size)
378                 fputs("\t*", fp);
379         }
380     }
381     putc('\n', fp);
382 }
383 
mplp_func(void * data,bam1_t * b)384 static int mplp_func(void *data, bam1_t *b)
385 {
386     char *ref;
387     mplp_aux_t *ma = (mplp_aux_t*)data;
388     int ret, skip = 0;
389     hts_pos_t ref_len;
390 
391     do {
392         int has_ref;
393         ret = ma->iter? sam_itr_next(ma->fp, ma->iter, b) : sam_read1(ma->fp, ma->h, b);
394         if (ret < 0) break;
395         // The 'B' cigar operation is not part of the specification, considering as obsolete.
396         //  bam_remove_B(b);
397         if (b->core.tid < 0 || (b->core.flag&BAM_FUNMAP)) { // exclude unmapped reads
398             skip = 1;
399             continue;
400         }
401         if (ma->conf->rflag_require && !(ma->conf->rflag_require&b->core.flag)) { skip = 1; continue; }
402         if (ma->conf->rflag_filter && ma->conf->rflag_filter&b->core.flag) { skip = 1; continue; }
403         if (ma->conf->bed && ma->conf->all == 0) { // test overlap
404             skip = !bed_overlap(ma->conf->bed, sam_hdr_tid2name(ma->h, b->core.tid), b->core.pos, bam_endpos(b));
405             if (skip) continue;
406         }
407         if (ma->conf->rghash) { // exclude read groups
408             uint8_t *rg = bam_aux_get(b, "RG");
409             skip = (rg && khash_str2int_get(ma->conf->rghash, (const char*)(rg+1), NULL)==0);
410             if (skip) continue;
411         }
412         if (ma->conf->flag & MPLP_ILLUMINA13) {
413             int i;
414             uint8_t *qual = bam_get_qual(b);
415             for (i = 0; i < b->core.l_qseq; ++i)
416                 qual[i] = qual[i] > 31? qual[i] - 31 : 0;
417         }
418 
419         if (ma->conf->fai && b->core.tid >= 0) {
420             has_ref = mplp_get_ref(ma, b->core.tid, &ref, &ref_len);
421             if (has_ref && ref_len <= b->core.pos) { // exclude reads outside of the reference sequence
422                 fprintf(samtools_stderr,"[%s] Skipping because %"PRIhts_pos" is outside of %"PRIhts_pos" [ref:%d]\n",
423                         __func__, (int64_t) b->core.pos, ref_len, b->core.tid);
424                 skip = 1;
425                 continue;
426             }
427         } else {
428             has_ref = 0;
429         }
430 
431         skip = 0;
432         if (has_ref && (ma->conf->flag&MPLP_REALN)) sam_prob_realn(b, ref, ref_len, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3);
433         if (has_ref && ma->conf->capQ_thres > 10) {
434             int q = sam_cap_mapq(b, ref, ref_len, ma->conf->capQ_thres);
435             if (q < 0) skip = 1;
436             else if (b->core.qual > q) b->core.qual = q;
437         }
438         if (b->core.qual < ma->conf->min_mq) skip = 1;
439         else if ((ma->conf->flag&MPLP_NO_ORPHAN) && (b->core.flag&BAM_FPAIRED) && !(b->core.flag&BAM_FPROPER_PAIR)) skip = 1;
440     } while (skip);
441     return ret;
442 }
443 
group_smpl(mplp_pileup_t * m,bam_sample_t * sm,kstring_t * buf,int n,char * const * fn,int * n_plp,const bam_pileup1_t ** plp,int ignore_rg)444 static void group_smpl(mplp_pileup_t *m, bam_sample_t *sm, kstring_t *buf,
445                        int n, char *const*fn, int *n_plp, const bam_pileup1_t **plp, int ignore_rg)
446 {
447     int i, j;
448     memset(m->n_plp, 0, m->n * sizeof(int));
449     for (i = 0; i < n; ++i) {
450         for (j = 0; j < n_plp[i]; ++j) {
451             const bam_pileup1_t *p = plp[i] + j;
452             uint8_t *q;
453             int id = -1;
454             q = ignore_rg? NULL : bam_aux_get(p->b, "RG");
455             if (q) id = bam_smpl_rg2smid(sm, fn[i], (char*)q+1, buf);
456             if (id < 0) id = bam_smpl_rg2smid(sm, fn[i], 0, buf);
457             if (id < 0 || id >= m->n) {
458                 assert(q); // otherwise a bug
459                 fprintf(samtools_stderr, "[%s] Read group %s used in file %s but absent from the header or an alignment missing read group.\n", __func__, (char*)q+1, fn[i]);
460                 samtools_exit(EXIT_FAILURE);
461             }
462             if (m->n_plp[id] == m->m_plp[id]) {
463                 m->m_plp[id] = m->m_plp[id]? m->m_plp[id]<<1 : 8;
464                 m->plp[id] = realloc(m->plp[id], sizeof(bam_pileup1_t) * m->m_plp[id]);
465             }
466             m->plp[id][m->n_plp[id]++] = *p;
467         }
468     }
469 }
470 
471 /*
472  * Performs pileup
473  * @param conf configuration for this pileup
474  * @param n number of files specified in fn
475  * @param fn filenames
476  * @param fn_idx index filenames
477  */
mpileup(mplp_conf_t * conf,int n,char ** fn,char ** fn_idx)478 static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
479 {
480     extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list);
481     extern void bcf_call_del_rghash(void *rghash);
482     mplp_aux_t **data;
483     int i, tid, *n_plp, tid0 = 0, max_depth, max_indel_depth;
484     hts_pos_t pos, beg0 = 0, end0 = HTS_POS_MAX, ref_len;
485     const bam_pileup1_t **plp;
486     mplp_ref_t mp_ref = MPLP_REF_INIT;
487     bam_mplp_t iter;
488     sam_hdr_t *h = NULL; /* header of first file in input list */
489     char *ref;
490     void *rghash = NULL;
491     FILE *pileup_fp = NULL;
492 
493     bcf_callaux_t *bca = NULL;
494     bcf_callret1_t *bcr = NULL;
495     bcf_call_t bc;
496     htsFile *bcf_fp = NULL;
497     bcf_hdr_t *bcf_hdr = NULL;
498 
499     bam_sample_t *sm = NULL;
500     kstring_t buf;
501     mplp_pileup_t gplp;
502 
503     memset(&gplp, 0, sizeof(mplp_pileup_t));
504     memset(&buf, 0, sizeof(kstring_t));
505     memset(&bc, 0, sizeof(bcf_call_t));
506     data = calloc(n, sizeof(mplp_aux_t*));
507     plp = calloc(n, sizeof(bam_pileup1_t*));
508     n_plp = calloc(n, sizeof(int));
509     sm = bam_smpl_init();
510 
511     if (n == 0) {
512         fprintf(samtools_stderr,"[%s] no input file/data given\n", __func__);
513         samtools_exit(EXIT_FAILURE);
514     }
515 
516     // read the header of each file in the list and initialize data
517     refs_t *refs = NULL;
518     for (i = 0; i < n; ++i) {
519         sam_hdr_t *h_tmp;
520         data[i] = calloc(1, sizeof(mplp_aux_t));
521         data[i]->fp = sam_open_format(fn[i], "rb", &conf->ga.in);
522         if ( !data[i]->fp )
523         {
524             fprintf(samtools_stderr, "[%s] failed to open %s: %s\n", __func__, fn[i], strerror(errno));
525             samtools_exit(EXIT_FAILURE);
526         }
527         if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
528             fprintf(samtools_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
529             samtools_exit(EXIT_FAILURE);
530         }
531 
532         if (!refs && conf->fai_fname) {
533             if (hts_set_fai_filename(data[i]->fp, conf->fai_fname) != 0) {
534                 fprintf(samtools_stderr, "[%s] failed to process %s: %s\n",
535                         __func__, conf->fai_fname, strerror(errno));
536                 samtools_exit(EXIT_FAILURE);
537             }
538             refs = cram_get_refs(data[i]->fp);
539         } else if (conf->fai_fname) {
540             if (hts_set_opt(data[i]->fp, CRAM_OPT_SHARED_REF, refs) != 0) {
541                 fprintf(samtools_stderr, "[%s] failed to process %s: %s\n",
542                         __func__, conf->fai_fname, strerror(errno));
543                 samtools_exit(EXIT_FAILURE);
544             }
545         }
546 
547         data[i]->conf = conf;
548         data[i]->ref = &mp_ref;
549         h_tmp = sam_hdr_read(data[i]->fp);
550         if ( !h_tmp ) {
551             fprintf(samtools_stderr,"[%s] fail to read the header of %s\n", __func__, fn[i]);
552             samtools_exit(EXIT_FAILURE);
553         }
554         bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : sam_hdr_str(h_tmp));
555         if (conf->flag & MPLP_BCF) {
556             // Collect read group IDs with PL (platform) listed in pl_list (note: fragile, strstr search)
557             rghash = bcf_call_add_rg(rghash, sam_hdr_str(h_tmp), conf->pl_list);
558         }
559         if (conf->reg) {
560             hts_idx_t *idx = NULL;
561             // If index filename has not been specfied, look in BAM folder
562             if (fn_idx != NULL)  {
563                 idx = sam_index_load2(data[i]->fp, fn[i], fn_idx[i]);
564             } else {
565                 idx = sam_index_load(data[i]->fp, fn[i]);
566             }
567 
568             if (idx == NULL) {
569                 fprintf(samtools_stderr, "[%s] fail to load index for %s\n", __func__, fn[i]);
570                 samtools_exit(EXIT_FAILURE);
571             }
572             if ( (data[i]->iter=sam_itr_querys(idx, h_tmp, conf->reg)) == 0) {
573                 fprintf(samtools_stderr, "[E::%s] fail to parse region '%s' with %s\n", __func__, conf->reg, fn[i]);
574                 samtools_exit(EXIT_FAILURE);
575             }
576             if (i == 0) beg0 = data[i]->iter->beg, end0 = data[i]->iter->end, tid0 = data[i]->iter->tid;
577             hts_idx_destroy(idx);
578         }
579         else
580             data[i]->iter = NULL;
581 
582         if (i == 0) h = data[i]->h = h_tmp; // save the header of the first file
583         else {
584             // FIXME: check consistency between h and h_tmp
585             sam_hdr_destroy(h_tmp);
586 
587             // we store only the first file's header; it's (alleged to be)
588             // compatible with the i-th file's target_name lookup needs
589             data[i]->h = h;
590         }
591     }
592     fprintf(samtools_stderr, "[%s] %d samples in %d input files\n", __func__, sm->n, n);
593     if (conf->flag & MPLP_BCF)
594     {
595         const char *mode;
596         // allocate data storage proportionate to number of samples being studied sm->n
597         gplp.n = sm->n;
598         gplp.n_plp = calloc(sm->n, sizeof(int));
599         gplp.m_plp = calloc(sm->n, sizeof(int));
600         gplp.plp = calloc(sm->n, sizeof(bam_pileup1_t*));
601 
602         // write the VCF header
603 
604         if ( conf->flag & MPLP_VCF )
605             mode = (conf->flag&MPLP_NO_COMP)? "wu" : "wz";   // uncompressed VCF or compressed VCF
606         else
607             mode = (conf->flag&MPLP_NO_COMP)? "wub" : "wb";  // uncompressed BCF or compressed BCF
608 
609         bcf_fp = bcf_open(conf->output_fname? conf->output_fname : "-", mode);
610         if (bcf_fp == NULL) {
611             fprintf(samtools_stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
612             samtools_exit(EXIT_FAILURE);
613         }
614         autoflush_if_stdout(bcf_fp, conf->output_fname);
615 
616         // BCF header creation
617         bcf_hdr = bcf_hdr_init("w");
618         kstring_t str = {0,0,NULL};
619 
620         ksprintf(&str, "##samtoolsVersion=%s+htslib-%s\n",samtools_version(),hts_version());
621         bcf_hdr_append(bcf_hdr, str.s);
622 
623         str.l = 0;
624         ksprintf(&str, "##samtoolsCommand=samtools mpileup");
625         for (i=1; i<conf->argc; i++) ksprintf(&str, " %s", conf->argv[i]);
626         kputc('\n', &str);
627         bcf_hdr_append(bcf_hdr, str.s);
628 
629         if (conf->fai_fname)
630         {
631             str.l = 0;
632             ksprintf(&str, "##reference=file://%s\n", conf->fai_fname);
633             bcf_hdr_append(bcf_hdr, str.s);
634         }
635 
636         // Translate BAM @SQ tags to BCF ##contig tags
637         // todo: use/write new BAM header manipulation routines, fill also UR, M5
638         for (i=0; i < sam_hdr_nref(h); i++)
639         {
640             str.l = 0;
641             ksprintf(&str, "##contig=<ID=%s,length=%"PRId64">", sam_hdr_tid2name(h, i), (int64_t) sam_hdr_tid2len(h, i));
642             bcf_hdr_append(bcf_hdr, str.s);
643         }
644         free(str.s);
645         bcf_hdr_append(bcf_hdr,"##ALT=<ID=*,Description=\"Represents allele(s) other than observed.\">");
646         bcf_hdr_append(bcf_hdr,"##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">");
647         bcf_hdr_append(bcf_hdr,"##INFO=<ID=IDV,Number=1,Type=Integer,Description=\"Maximum number of reads supporting an indel\">");
648         bcf_hdr_append(bcf_hdr,"##INFO=<ID=IMF,Number=1,Type=Float,Description=\"Maximum fraction of reads supporting an indel\">");
649         bcf_hdr_append(bcf_hdr,"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">");
650         bcf_hdr_append(bcf_hdr,"##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">");
651         bcf_hdr_append(bcf_hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">");
652         bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">");
653         bcf_hdr_append(bcf_hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">");
654         bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">");
655 #if CDF_MWU_TESTS
656         bcf_hdr_append(bcf_hdr,"##INFO=<ID=RPB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias [CDF] (bigger is better)\">");
657         bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias [CDF] (bigger is better)\">");
658         bcf_hdr_append(bcf_hdr,"##INFO=<ID=BQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias [CDF] (bigger is better)\">");
659         bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQSB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias [CDF] (bigger is better)\">");
660 #endif
661         bcf_hdr_append(bcf_hdr,"##INFO=<ID=SGB,Number=1,Type=Float,Description=\"Segregation based metric.\">");
662         bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQ0F,Number=1,Type=Float,Description=\"Fraction of MQ0 reads (smaller is better)\">");
663         bcf_hdr_append(bcf_hdr,"##INFO=<ID=I16,Number=16,Type=Float,Description=\"Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h\">");
664         bcf_hdr_append(bcf_hdr,"##INFO=<ID=QS,Number=R,Type=Float,Description=\"Auxiliary tag used for calling\">");
665         bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"List of Phred-scaled genotype likelihoods\">");
666         if ( conf->fmt_flag&B2B_FMT_DP )
667             bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Number of high-quality bases\">");
668         if ( conf->fmt_flag&B2B_FMT_DV )
669             bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"Number of high-quality non-reference bases\">");
670         if ( conf->fmt_flag&B2B_FMT_DPR )
671             bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
672         if ( conf->fmt_flag&B2B_INFO_DPR )
673             bcf_hdr_append(bcf_hdr,"##INFO=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
674         if ( conf->fmt_flag&B2B_FMT_DP4 )
675             bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-fwd, ref-reverse, alt-fwd and alt-reverse bases\">");
676         if ( conf->fmt_flag&B2B_FMT_SP )
677             bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=SP,Number=1,Type=Integer,Description=\"Phred-scaled strand bias P-value\">");
678         if ( conf->fmt_flag&B2B_FMT_AD )
679             bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths\">");
680         if ( conf->fmt_flag&B2B_FMT_ADF )
681             bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=ADF,Number=R,Type=Integer,Description=\"Allelic depths on the forward strand\">");
682         if ( conf->fmt_flag&B2B_FMT_ADR )
683             bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=ADR,Number=R,Type=Integer,Description=\"Allelic depths on the reverse strand\">");
684         if ( conf->fmt_flag&B2B_INFO_AD )
685             bcf_hdr_append(bcf_hdr,"##INFO=<ID=AD,Number=R,Type=Integer,Description=\"Total allelic depths\">");
686         if ( conf->fmt_flag&B2B_INFO_ADF )
687             bcf_hdr_append(bcf_hdr,"##INFO=<ID=ADF,Number=R,Type=Integer,Description=\"Total allelic depths on the forward strand\">");
688         if ( conf->fmt_flag&B2B_INFO_ADR )
689             bcf_hdr_append(bcf_hdr,"##INFO=<ID=ADR,Number=R,Type=Integer,Description=\"Total allelic depths on the reverse strand\">");
690 
691         for (i=0; i<sm->n; i++)
692             bcf_hdr_add_sample(bcf_hdr, sm->smpl[i]);
693         bcf_hdr_add_sample(bcf_hdr, NULL);
694         if (bcf_hdr_write(bcf_fp, bcf_hdr) != 0) {
695             print_error_errno("mpileup", "Failed to write VCF/BCF header to \"%s\"",
696                               conf->output_fname? conf->output_fname : "standard output");
697             samtools_exit(EXIT_FAILURE);
698         }
699         // End of BCF header creation
700 
701         // Initialise the calling algorithm
702         bca = bcf_call_init(-1., conf->min_baseQ);
703         bcr = calloc(sm->n, sizeof(bcf_callret1_t));
704         bca->rghash = rghash;
705         bca->openQ = conf->openQ, bca->extQ = conf->extQ, bca->tandemQ = conf->tandemQ;
706         bca->min_frac = conf->min_frac;
707         bca->min_support = conf->min_support;
708         bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE;
709 
710         bc.bcf_hdr = bcf_hdr;
711         bc.n = sm->n;
712         bc.PL = malloc(15 * sm->n * sizeof(*bc.PL));
713         if (conf->fmt_flag)
714         {
715             assert( sizeof(float)==sizeof(int32_t) );
716             bc.DP4 = malloc(sm->n * sizeof(int32_t) * 4);
717             bc.fmt_arr = malloc(sm->n * sizeof(float)); // all fmt_flag fields
718             if ( conf->fmt_flag&(B2B_INFO_DPR|B2B_FMT_DPR|B2B_INFO_AD|B2B_INFO_ADF|B2B_INFO_ADR|B2B_FMT_AD|B2B_FMT_ADF|B2B_FMT_ADR) )
719             {
720                 // first B2B_MAX_ALLELES fields for total numbers, the rest per-sample
721                 bc.ADR = (int32_t*) malloc((sm->n+1)*B2B_MAX_ALLELES*sizeof(int32_t));
722                 bc.ADF = (int32_t*) malloc((sm->n+1)*B2B_MAX_ALLELES*sizeof(int32_t));
723                 for (i=0; i<sm->n; i++)
724                 {
725                     bcr[i].ADR = bc.ADR + (i+1)*B2B_MAX_ALLELES;
726                     bcr[i].ADF = bc.ADF + (i+1)*B2B_MAX_ALLELES;
727                 }
728             }
729         }
730     }
731     else {
732         pileup_fp = conf->output_fname? fopen(conf->output_fname, "w") : samtools_stdout;
733 
734         if (pileup_fp == NULL) {
735             fprintf(samtools_stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname, strerror(errno));
736             samtools_exit(EXIT_FAILURE);
737         }
738     }
739 
740     // init pileup
741     iter = bam_mplp_init(n, mplp_func, (void**)data);
742     if (conf->flag & MPLP_PRINT_MODS) {
743         bam_mplp_constructor(iter, pileup_cd_create);
744         bam_mplp_destructor(iter, pileup_cd_destroy);
745     }
746     if ( conf->flag & MPLP_SMART_OVERLAPS ) bam_mplp_init_overlaps(iter);
747     if ( !conf->max_depth ) {
748         max_depth = INT_MAX;
749         fprintf(samtools_stderr, "[%s] Max depth set to maximum value (%d)\n", __func__, INT_MAX);
750     } else {
751         max_depth = conf->max_depth;
752         if ( max_depth * n > 1<<20 )
753             fprintf(samtools_stderr, "[%s] Combined max depth is above 1M. Potential memory hog!\n", __func__);
754     }
755 
756     // Only used when writing BCF
757     max_indel_depth = conf->max_indel_depth * sm->n;
758     bam_mplp_set_maxcnt(iter, max_depth);
759     bcf1_t *bcf_rec = bcf_init1();
760     int ret;
761     int last_tid = -1;
762     hts_pos_t last_pos = -1;
763 
764     // begin pileup
765     while ( (ret=bam_mplp64_auto(iter, &tid, &pos, n_plp, plp)) > 0) {
766         if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
767         mplp_get_ref(data[0], tid, &ref, &ref_len);
768         //printf("tid=%d len=%d ref=%p/%s\n", tid, ref_len, ref, ref);
769         if (conf->flag & MPLP_BCF) {
770             int total_depth, _ref0, ref16;
771             if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, sam_hdr_tid2name(h, tid), pos, pos+1)) continue;
772             for (i = total_depth = 0; i < n; ++i) total_depth += n_plp[i];
773             group_smpl(&gplp, sm, &buf, n, fn, n_plp, plp, conf->flag & MPLP_IGNORE_RG);
774             _ref0 = (ref && pos < ref_len)? ref[pos] : 'N';
775             ref16 = seq_nt16_table[_ref0];
776             bcf_callaux_clean(bca, &bc);
777             for (i = 0; i < gplp.n; ++i)
778                 bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i);
779             bc.tid = tid; bc.pos = pos;
780             bcf_call_combine(gplp.n, bcr, bca, ref16, &bc);
781             bcf_clear1(bcf_rec);
782             bcf_call2bcf(&bc, bcf_rec, bcr, conf->fmt_flag, 0, 0);
783             if (bcf_write1(bcf_fp, bcf_hdr, bcf_rec) != 0) {
784                 print_error_errno("mpileup", "Failed to write VCF/BCF record to \"%s\"",
785                                   conf->output_fname?conf->output_fname:"standard output");
786                 samtools_exit(EXIT_FAILURE);
787             }
788             // call indels; todo: subsampling with total_depth>max_indel_depth instead of ignoring?
789             if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0)
790             {
791                 bcf_callaux_clean(bca, &bc);
792                 for (i = 0; i < gplp.n; ++i)
793                     bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], -1, bca, bcr + i);
794                 if (bcf_call_combine(gplp.n, bcr, bca, -1, &bc) >= 0) {
795                     bcf_clear1(bcf_rec);
796                     bcf_call2bcf(&bc, bcf_rec, bcr, conf->fmt_flag, bca, ref);
797                     if (bcf_write1(bcf_fp, bcf_hdr, bcf_rec) != 0) {
798                         print_error_errno("mpileup", "Failed to write VCF/BCF record to \"%s\"",
799                                           conf->output_fname?conf->output_fname:"standard output");
800                         samtools_exit(EXIT_FAILURE);
801                     }
802                 }
803             }
804         } else {
805             if (conf->all) {
806                 // Deal with missing portions of previous tids
807                 while (tid > last_tid) {
808                     if (last_tid >= 0 && !conf->reg) {
809                         while (++last_pos < sam_hdr_tid2len(h, last_tid)) {
810                             if (conf->bed && bed_overlap(conf->bed, sam_hdr_tid2name(h, last_tid), last_pos, last_pos + 1) == 0)
811                                 continue;
812                             print_empty_pileup(pileup_fp, conf, sam_hdr_tid2name(h, last_tid), last_pos, n, ref, ref_len);
813                         }
814                     }
815                     last_tid++;
816                     last_pos = -1;
817                     if (conf->all < 2)
818                         break;
819                 }
820             }
821             if (conf->all) {
822                 // Deal with missing portion of current tid
823                 while (++last_pos < pos) {
824                     if (conf->reg && last_pos < beg0) continue; // out of range; skip
825                     if (conf->bed && bed_overlap(conf->bed, sam_hdr_tid2name(h, tid), last_pos, last_pos + 1) == 0)
826                         continue;
827                     print_empty_pileup(pileup_fp, conf, sam_hdr_tid2name(h, tid), last_pos, n, ref, ref_len);
828                 }
829                 last_tid = tid;
830                 last_pos = pos;
831             }
832             if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, sam_hdr_tid2name(h, tid), pos, pos+1)) continue;
833 
834             fprintf(pileup_fp, "%s\t%"PRIhts_pos"\t%c", sam_hdr_tid2name(h, tid), pos + 1, (ref && pos < ref_len)? ref[pos] : 'N');
835             for (i = 0; i < n; ++i) {
836                 int j, cnt;
837                 for (j = cnt = 0; j < n_plp[i]; ++j) {
838                     const bam_pileup1_t *p = plp[i] + j;
839                     int c = p->qpos < p->b->core.l_qseq
840                              ? bam_get_qual(p->b)[p->qpos]
841                              : 0;
842                     if (c >= conf->min_baseQ) ++cnt;
843                 }
844                 fprintf(pileup_fp, "\t%d\t", cnt);
845                 if (n_plp[i] == 0) {
846                     fputs("*\t*", pileup_fp);
847                     int flag_value = MPLP_PRINT_MAPQ_CHAR;
848                     while(flag_value < MPLP_PRINT_LAST) {
849                         if (flag_value != MPLP_PRINT_MODS
850                             && (conf->flag & flag_value))
851                             fputs("\t*", pileup_fp);
852                         flag_value <<= 1;
853                     }
854                     if (conf->auxlist) {
855                         int t = 0;
856                         while(t++ < ((klist_t(auxlist) *)conf->auxlist)->size)
857                             fputs("\t*", pileup_fp);
858                     }
859                 } else {
860                     int n = 0;
861                     kstring_t ks = KS_INITIALIZE;
862                     for (j = 0; j < n_plp[i]; ++j) {
863                         const bam_pileup1_t *p = plp[i] + j;
864                         int c = p->qpos < p->b->core.l_qseq
865                             ? bam_get_qual(p->b)[p->qpos]
866                             : 0;
867                         if (c >= conf->min_baseQ) {
868                             n++;
869                             if (pileup_seq(pileup_fp, plp[i] + j, pos, ref_len,
870                                            ref, &ks, conf->rev_del,
871                                            conf->no_ins, conf->no_ins_mods,
872                                            conf->no_del, conf->no_ends) < 0) {
873                                 ret = 1;
874                                 goto fail;
875                             }
876                         }
877                     }
878                     if (!n) putc('*', pileup_fp);
879 
880                     /* Print base qualities */
881                     n = 0;
882                     ks_free(&ks);
883                     putc('\t', pileup_fp);
884                     for (j = 0; j < n_plp[i]; ++j) {
885                         const bam_pileup1_t *p = plp[i] + j;
886                         int c = p->qpos < p->b->core.l_qseq
887                             ? bam_get_qual(p->b)[p->qpos]
888                             : 0;
889                         if (c >= conf->min_baseQ) {
890                             c = c + 33 < 126? c + 33 : 126;
891                             putc(c, pileup_fp);
892                             n++;
893                         }
894                     }
895                     if (!n) putc('*', pileup_fp);
896 
897                     /* Print selected columns */
898                     int flag_value = MPLP_PRINT_MAPQ_CHAR;
899                     while(flag_value < MPLP_PRINT_LAST) {
900                         if (flag_value != MPLP_PRINT_MODS
901                             && (conf->flag & flag_value)) {
902                             n = 0;
903                             putc('\t', pileup_fp);
904                             for (j = 0; j < n_plp[i]; ++j) {
905                                 const bam_pileup1_t *p = &plp[i][j];
906                                 int c = p->qpos < p->b->core.l_qseq
907                                     ? bam_get_qual(p->b)[p->qpos]
908                                     : 0;
909                                 if ( c < conf->min_baseQ ) continue;
910                                 if (n > 0 && flag_value != MPLP_PRINT_MAPQ_CHAR) putc(',', pileup_fp);
911                                 n++;
912 
913                                 switch (flag_value) {
914                                 case MPLP_PRINT_MAPQ_CHAR:
915                                     c = p->b->core.qual + 33;
916                                     if (c > 126) c = 126;
917                                     putc(c, pileup_fp);
918                                     break;
919                                 case MPLP_PRINT_QPOS:
920                                     // query position in current orientation
921                                     fprintf(pileup_fp, "%d", p->qpos + 1);
922                                     break;
923                                 case MPLP_PRINT_QPOS5: {
924                                     // query position in 5' to 3' orientation
925                                     int pos5 = bam_is_rev(p->b)
926                                         ? p->b->core.l_qseq-p->qpos + p->is_del
927                                         : p->qpos + 1;
928                                     fprintf(pileup_fp, "%d", pos5);
929                                     break;
930                                 }
931                                 case MPLP_PRINT_QNAME:
932                                     fputs(bam_get_qname(p->b), pileup_fp);
933                                     break;
934                                 case MPLP_PRINT_FLAG:
935                                     fprintf(pileup_fp, "%d", p->b->core.flag);
936                                     break;
937                                 case MPLP_PRINT_RNAME:
938                                     if (p->b->core.tid >= 0)
939                                         fputs(sam_hdr_tid2name(h, p->b->core.tid), pileup_fp);
940                                     else
941                                         putc('*', pileup_fp);
942                                     break;
943                                 case MPLP_PRINT_POS:
944                                     fprintf(pileup_fp, "%"PRId64, (int64_t) p->b->core.pos + 1);
945                                     break;
946                                 case MPLP_PRINT_MAPQ:
947                                     fprintf(pileup_fp, "%d", p->b->core.qual);
948                                     break;
949                                 case MPLP_PRINT_RNEXT:
950                                     if (p->b->core.mtid >= 0)
951                                         fputs(sam_hdr_tid2name(h, p->b->core.mtid), pileup_fp);
952                                     else
953                                         putc('*', pileup_fp);
954                                     break;
955                                 case MPLP_PRINT_PNEXT:
956                                     fprintf(pileup_fp, "%"PRId64, (int64_t) p->b->core.mpos + 1);
957                                     break;
958                                 }
959                             }
960                             if (!n) putc('*', pileup_fp);
961                         }
962                         flag_value <<= 1;
963                     }
964 
965                     /* Print selected tags */
966                     klist_t(auxlist) *auxlist_p = ((klist_t(auxlist) *)conf->auxlist);
967                     if (auxlist_p && auxlist_p->size) {
968                         kliter_t(auxlist) *aux;
969                         for (aux = kl_begin(auxlist_p); aux != kl_end(auxlist_p); aux = kl_next(aux)) {
970                             n = 0;
971                             putc('\t', pileup_fp);
972                             for (j = 0; j < n_plp[i]; ++j) {
973                                 const bam_pileup1_t *p = &plp[i][j];
974                                 int c = p->qpos < p->b->core.l_qseq
975                                     ? bam_get_qual(p->b)[p->qpos]
976                                     : 0;
977                                 if ( c < conf->min_baseQ ) continue;
978 
979                                 if (n > 0) putc(conf->sep, pileup_fp);
980                                 n++;
981                                 uint8_t* tag_u = bam_aux_get(p->b, kl_val(aux));
982                                 if (!tag_u) {
983                                     putc(conf->empty , pileup_fp);
984                                     continue;
985                                 }
986 
987                                 /* Tag value is string */
988                                 if (*tag_u == 'Z' || *tag_u == 'H') {
989                                     char *tag_s = bam_aux2Z(tag_u);
990                                     if (!tag_s) continue;
991                                     fputs(tag_s, pileup_fp);
992                                 }
993 
994                                 /* Tag value is integer */
995                                 if (*tag_u == 'I' || *tag_u == 'i' || *tag_u == 'C' || *tag_u == 'c' || *tag_u == 'S' || *tag_u == 's') {
996                                     int64_t tag_i = bam_aux2i(tag_u);
997                                     fprintf(pileup_fp, "%" PRId64 "", tag_i);
998                                 }
999 
1000                                 /* Tag value is float */
1001                                 if (*tag_u == 'd' || *tag_u == 'f') {
1002                                     double tag_f = bam_aux2f(tag_u);
1003                                     fprintf(pileup_fp, "%lf", tag_f);
1004                                 }
1005 
1006                                 /* Tag value is character */
1007                                 if (*tag_u == 'A') {
1008                                     char tag_c = bam_aux2A(tag_u);
1009                                     putc(tag_c, pileup_fp);
1010                                 }
1011                             }
1012                             if (!n) putc('*', pileup_fp);
1013                         }
1014                     }
1015                 }
1016             }
1017             putc('\n', pileup_fp);
1018         }
1019     }
1020 
1021     if (ret < 0) {
1022         print_error("mpileup", "error reading from input file");
1023         ret = EXIT_FAILURE;
1024         goto fail;
1025     }
1026 
1027     if (conf->all && !(conf->flag & MPLP_BCF)) {
1028         // Handle terminating region
1029         if (last_tid < 0 && conf->reg && conf->all > 1) {
1030             last_tid = tid0;
1031             last_pos = beg0-1;
1032             mplp_get_ref(data[0], tid0, &ref, &ref_len);
1033         }
1034        while (last_tid >= 0 && last_tid < sam_hdr_nref(h)) {
1035             while (++last_pos < sam_hdr_tid2len(h, last_tid)) {
1036                 if (last_pos >= end0) break;
1037                 if (conf->bed && bed_overlap(conf->bed, sam_hdr_tid2name(h, last_tid), last_pos, last_pos + 1) == 0)
1038                     continue;
1039                 print_empty_pileup(pileup_fp, conf, sam_hdr_tid2name(h, last_tid), last_pos, n, ref, ref_len);
1040             }
1041             last_tid++;
1042             last_pos = -1;
1043             if (conf->all < 2 || conf->reg)
1044                 break;
1045         }
1046     }
1047 
1048 fail:
1049     // clean up
1050     free(bc.tmp.s);
1051     bcf_destroy1(bcf_rec);
1052     if (bcf_fp)
1053     {
1054         release_autoflush(bcf_fp);
1055         hts_close(bcf_fp);
1056         bcf_hdr_destroy(bcf_hdr);
1057         bcf_call_destroy(bca);
1058         free(bc.PL);
1059         free(bc.DP4);
1060         free(bc.ADR);
1061         free(bc.ADF);
1062         free(bc.fmt_arr);
1063         free(bcr);
1064     }
1065     if (pileup_fp && conf->output_fname) fclose(pileup_fp);
1066     bam_smpl_destroy(sm); free(buf.s);
1067     for (i = 0; i < gplp.n; ++i) free(gplp.plp[i]);
1068     free(gplp.plp); free(gplp.n_plp); free(gplp.m_plp);
1069     bcf_call_del_rghash(rghash);
1070     bam_mplp_destroy(iter);
1071     sam_hdr_destroy(h);
1072     for (i = 0; i < n; ++i) {
1073         sam_close(data[i]->fp);
1074         if (data[i]->iter) hts_itr_destroy(data[i]->iter);
1075         free(data[i]);
1076     }
1077     free(data); free(plp); free(n_plp);
1078     free(mp_ref.ref[0]);
1079     free(mp_ref.ref[1]);
1080     return ret;
1081 }
1082 
is_url(const char * s)1083 static int is_url(const char *s)
1084 {
1085     static const char uri_scheme_chars[] =
1086         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-";
1087     return s[strspn(s, uri_scheme_chars)] == ':';
1088 }
1089 
1090 #define MAX_PATH_LEN 1024
read_file_list(const char * file_list,int * n,char ** argv[])1091 int read_file_list(const char *file_list,int *n,char **argv[])
1092 {
1093     char buf[MAX_PATH_LEN];
1094     int len, nfiles = 0;
1095     char **files = NULL;
1096     struct stat sb;
1097 
1098     *n = 0;
1099     *argv = NULL;
1100 
1101     FILE *fh = fopen(file_list,"r");
1102     if ( !fh )
1103     {
1104         fprintf(samtools_stderr,"%s: %s\n", file_list,strerror(errno));
1105         return 1;
1106     }
1107 
1108     files = calloc(nfiles,sizeof(char*));
1109     nfiles = 0;
1110     while ( fgets(buf,MAX_PATH_LEN,fh) )
1111     {
1112         // allow empty lines and trailing spaces
1113         len = strlen(buf);
1114         while ( len>0 && isspace(buf[len-1]) ) len--;
1115         if ( !len ) continue;
1116 
1117         // check sanity of the file list
1118         buf[len] = 0;
1119         if (! (is_url(buf) || stat(buf, &sb) == 0))
1120         {
1121             // no such file, check if it is safe to print its name
1122             int i, safe_to_print = 1;
1123             for (i=0; i<len; i++)
1124                 if (!isprint(buf[i])) { safe_to_print = 0; break; }
1125             if ( safe_to_print )
1126                 fprintf(samtools_stderr,"The file list \"%s\" appears broken, could not locate: %s\n", file_list,buf);
1127             else
1128                 fprintf(samtools_stderr,"Does the file \"%s\" really contain a list of files and do all exist?\n", file_list);
1129             return 1;
1130         }
1131 
1132         nfiles++;
1133         files = realloc(files,nfiles*sizeof(char*));
1134         files[nfiles-1] = strdup(buf);
1135     }
1136     fclose(fh);
1137     if ( !nfiles )
1138     {
1139         fprintf(samtools_stderr,"No files read from %s\n", file_list);
1140         return 1;
1141     }
1142     *argv = files;
1143     *n    = nfiles;
1144     return 0;
1145 }
1146 #undef MAX_PATH_LEN
1147 
parse_format_flag(const char * str)1148 int parse_format_flag(const char *str)
1149 {
1150     int i, flag = 0, n_tags;
1151     char **tags = hts_readlist(str, 0, &n_tags);
1152     for(i=0; i<n_tags; i++)
1153     {
1154         if ( !strcasecmp(tags[i],"DP") ) flag |= B2B_FMT_DP;
1155         else if ( !strcasecmp(tags[i],"DV") ) { flag |= B2B_FMT_DV; fprintf(samtools_stderr, "[warning] tag DV functional, but deprecated. Please switch to `AD` in future.\n"); }
1156         else if ( !strcasecmp(tags[i],"SP") ) flag |= B2B_FMT_SP;
1157         else if ( !strcasecmp(tags[i],"DP4") ) { flag |= B2B_FMT_DP4; fprintf(samtools_stderr, "[warning] tag DP4 functional, but deprecated. Please switch to `ADF` and `ADR` in future.\n"); }
1158         else if ( !strcasecmp(tags[i],"DPR") ) { flag |= B2B_FMT_DPR; fprintf(samtools_stderr, "[warning] tag DPR functional, but deprecated. Please switch to `AD` in future.\n"); }
1159         else if ( !strcasecmp(tags[i],"INFO/DPR") ) { flag |= B2B_INFO_DPR; fprintf(samtools_stderr, "[warning] tag INFO/DPR functional, but deprecated. Please switch to `INFO/AD` in future.\n"); }
1160         else if ( !strcasecmp(tags[i],"AD") ) flag |= B2B_FMT_AD;
1161         else if ( !strcasecmp(tags[i],"ADF") ) flag |= B2B_FMT_ADF;
1162         else if ( !strcasecmp(tags[i],"ADR") ) flag |= B2B_FMT_ADR;
1163         else if ( !strcasecmp(tags[i],"INFO/AD") ) flag |= B2B_INFO_AD;
1164         else if ( !strcasecmp(tags[i],"INFO/ADF") ) flag |= B2B_INFO_ADF;
1165         else if ( !strcasecmp(tags[i],"INFO/ADR") ) flag |= B2B_INFO_ADR;
1166         else
1167         {
1168             fprintf(samtools_stderr,"Could not parse tag \"%s\" in \"%s\"\n", tags[i], str);
1169             samtools_exit(EXIT_FAILURE);
1170         }
1171         free(tags[i]);
1172     }
1173     if (n_tags) free(tags);
1174     return flag;
1175 }
1176 
print_usage(FILE * fp,const mplp_conf_t * mplp)1177 static void print_usage(FILE *fp, const mplp_conf_t *mplp)
1178 {
1179     char *tmp_require = bam_flag2str(mplp->rflag_require);
1180     char *tmp_filter  = bam_flag2str(mplp->rflag_filter);
1181 
1182     // Display usage information, formatted for the standard 80 columns.
1183     // (The unusual string formatting here aids the readability of this
1184     // source code in 80 columns, to the extent that's possible.)
1185 
1186     fprintf(fp,
1187 "\n"
1188 "Usage: samtools mpileup [options] in1.bam [in2.bam [...]]\n"
1189 "\n"
1190 "Input options:\n"
1191 "  -6, --illumina1.3+      quality is in the Illumina-1.3+ encoding\n"
1192 "  -A, --count-orphans     do not discard anomalous read pairs\n"
1193 "  -b, --bam-list FILE     list of input BAM filenames, one per line\n"
1194 "  -B, --no-BAQ            disable BAQ (per-Base Alignment Quality)\n"
1195 "  -C, --adjust-MQ INT     adjust mapping quality; recommended:50, disable:0 [0]\n"
1196 "  -d, --max-depth INT     max per-file depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
1197     fprintf(fp,
1198 "  -E, --redo-BAQ          recalculate BAQ on the fly, ignore existing BQs\n"
1199 "  -f, --fasta-ref FILE    faidx indexed reference sequence file\n"
1200 "  -G, --exclude-RG FILE   exclude read groups listed in FILE\n"
1201 "  -l, --positions FILE    skip unlisted positions (chr pos) or regions (BED)\n"
1202 "  -q, --min-MQ INT        skip alignments with mapQ smaller than INT [%d]\n", mplp->min_mq);
1203     fprintf(fp,
1204 "  -Q, --min-BQ INT        skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp->min_baseQ);
1205     fprintf(fp,
1206 "  -r, --region REG        region in which pileup is generated\n"
1207 "  -R, --ignore-RG         ignore RG tags (one BAM = one sample)\n"
1208 "  --rf, --incl-flags STR|INT  required flags: include reads with any of the mask bits set [%s]\n", tmp_require);
1209     fprintf(fp,
1210 "  --ff, --excl-flags STR|INT  filter flags: skip reads with any of the mask bits set\n"
1211 "                                            [%s]\n", tmp_filter);
1212     fprintf(fp,
1213 "  -x, --ignore-overlaps   disable read-pair overlap detection\n"
1214 "  -X, --customized-index  use customized index files\n" // -X flag for index filename
1215 "\n"
1216 "Output options:\n"
1217 "  -o, --output FILE        write output to FILE [standard output]\n"
1218 "  -O, --output-BP          output base positions on reads, current orientation\n"
1219 "      --output-BP-5        output base positions on reads, 5' to 3' orientation\n"
1220 "  -M, --output-mods        output base modifications\n"
1221 "  -s, --output-MQ          output mapping quality\n"
1222 "      --output-QNAME       output read names\n"
1223 "      --output-extra STR   output extra read fields and read tag values\n"
1224 "      --output-sep CHAR    set the separator character for tag lists [,]\n"
1225 "      --output-empty CHAR  set the no value character for tag lists [*]\n"
1226 "      --no-output-ins      skip insertion sequence after +NUM\n"
1227 "                           Use twice for complete insertion removal\n"
1228 "      --no-output-ins-mods don't display base modifications within insertions\n"
1229 "      --no-output-del      skip deletion sequence after -NUM\n"
1230 "                           Use twice for complete deletion removal\n"
1231 "      --no-output-ends     remove ^MQUAL and $ markup in sequence column\n"
1232 "      --reverse-del        use '#' character for deletions on the reverse strand\n"
1233 "  -a                       output all positions (including zero depth)\n"
1234 "  -a -a (or -aa)           output absolutely all positions, including unused ref. sequences\n"
1235 "\n"
1236 "Generic options:\n");
1237     sam_global_opt_help(fp, "-.--.--.");
1238 
1239     fprintf(fp, "\n"
1240 "Note that using \"samtools mpileup\" to generate BCF or VCF files is now\n"
1241 "deprecated.  To output these formats, please use \"bcftools mpileup\" instead.\n");
1242 
1243     free(tmp_require);
1244     free(tmp_filter);
1245 }
1246 
deprecated(char opt)1247 static void deprecated(char opt) {
1248     fprintf(samtools_stderr, "[warning] samtools mpileup option `%c` is functional, "
1249             "but deprecated. Please switch to using bcftools mpileup in future.\n", opt);
1250 }
1251 
bam_mpileup(int argc,char * argv[])1252 int bam_mpileup(int argc, char *argv[])
1253 {
1254     int c;
1255     const char *file_list = NULL;
1256     char **fn = NULL;
1257     int nfiles = 0, use_orphan = 0, has_index_file = 0;
1258     mplp_conf_t mplp;
1259     memset(&mplp, 0, sizeof(mplp_conf_t));
1260     mplp.min_baseQ = 13;
1261     mplp.capQ_thres = 0;
1262     mplp.max_depth = MPLP_MAX_DEPTH;
1263     mplp.max_indel_depth = MPLP_MAX_INDEL_DEPTH;
1264     mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
1265     mplp.min_frac = 0.002; mplp.min_support = 1;
1266     mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS;
1267     mplp.argc = argc; mplp.argv = argv;
1268     mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
1269     mplp.output_fname = NULL;
1270     mplp.all = 0;
1271     mplp.rev_del = 0;
1272     mplp.sep = ',';
1273     mplp.empty = '*';
1274     sam_global_args_init(&mplp.ga);
1275 
1276     static const struct option lopts[] =
1277     {
1278         SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '-'),
1279         {"rf", required_argument, NULL, 1},   // require flag
1280         {"ff", required_argument, NULL, 2},   // filter flag
1281         {"incl-flags", required_argument, NULL, 1},
1282         {"excl-flags", required_argument, NULL, 2},
1283         {"output", required_argument, NULL, 3},
1284         {"open-prob", required_argument, NULL, 4},
1285         {"output-QNAME", no_argument, NULL, 5},
1286         {"output-qname", no_argument, NULL, 5},
1287         {"illumina1.3+", no_argument, NULL, '6'},
1288         {"count-orphans", no_argument, NULL, 'A'},
1289         {"bam-list", required_argument, NULL, 'b'},
1290         {"no-BAQ", no_argument, NULL, 'B'},
1291         {"no-baq", no_argument, NULL, 'B'},
1292         {"adjust-MQ", required_argument, NULL, 'C'},
1293         {"adjust-mq", required_argument, NULL, 'C'},
1294         {"max-depth", required_argument, NULL, 'd'},
1295         {"redo-BAQ", no_argument, NULL, 'E'},
1296         {"redo-baq", no_argument, NULL, 'E'},
1297         {"fasta-ref", required_argument, NULL, 'f'},
1298         {"exclude-RG", required_argument, NULL, 'G'},
1299         {"exclude-rg", required_argument, NULL, 'G'},
1300         {"positions", required_argument, NULL, 'l'},
1301         {"region", required_argument, NULL, 'r'},
1302         {"ignore-RG", no_argument, NULL, 'R'},
1303         {"ignore-rg", no_argument, NULL, 'R'},
1304         {"min-MQ", required_argument, NULL, 'q'},
1305         {"min-mq", required_argument, NULL, 'q'},
1306         {"min-BQ", required_argument, NULL, 'Q'},
1307         {"min-bq", required_argument, NULL, 'Q'},
1308         {"ignore-overlaps", no_argument, NULL, 'x'},
1309         {"BCF", no_argument, NULL, 'g'},
1310         {"bcf", no_argument, NULL, 'g'},
1311         {"VCF", no_argument, NULL, 'v'},
1312         {"vcf", no_argument, NULL, 'v'},
1313         {"output-mods", no_argument, NULL, 'M'},
1314         {"output-BP", no_argument, NULL, 'O'},
1315         {"output-bp", no_argument, NULL, 'O'},
1316         {"output-BP-5", no_argument, NULL, 14},
1317         {"output-bp-5", no_argument, NULL, 14},
1318         {"output-MQ", no_argument, NULL, 's'},
1319         {"output-mq", no_argument, NULL, 's'},
1320         {"output-tags", required_argument, NULL, 't'},
1321         {"uncompressed", no_argument, NULL, 'u'},
1322         {"ext-prob", required_argument, NULL, 'e'},
1323         {"gap-frac", required_argument, NULL, 'F'},
1324         {"tandem-qual", required_argument, NULL, 'h'},
1325         {"skip-indels", no_argument, NULL, 'I'},
1326         {"max-idepth", required_argument, NULL, 'L'},
1327         {"min-ireads ", required_argument, NULL, 'm'},
1328         {"per-sample-mF", no_argument, NULL, 'p'},
1329         {"per-sample-mf", no_argument, NULL, 'p'},
1330         {"platforms", required_argument, NULL, 'P'},
1331         {"customized-index", no_argument, NULL, 'X'},
1332         {"reverse-del", no_argument, NULL, 6},
1333         {"output-extra", required_argument, NULL, 7},
1334         {"output-sep", required_argument, NULL, 8},
1335         {"output-empty", required_argument, NULL, 9},
1336         {"no-output-ins", no_argument, NULL, 10},
1337         {"no-output-ins-mods", no_argument, NULL, 11},
1338         {"no-output-del", no_argument, NULL, 12},
1339         {"no-output-ends", no_argument, NULL, 13},
1340         {NULL, 0, NULL, 0}
1341     };
1342 
1343     while ((c = getopt_long(argc, argv, "Agf:r:l:q:Q:uRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsVvxXt:aM",lopts,NULL)) >= 0) {
1344         switch (c) {
1345         case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
1346         case  1 :
1347             mplp.rflag_require = bam_str2flag(optarg);
1348             if ( mplp.rflag_require<0 ) { fprintf(samtools_stderr,"Could not parse --rf %s\n", optarg); return 1; }
1349             break;
1350         case  2 :
1351             mplp.rflag_filter = bam_str2flag(optarg);
1352             if ( mplp.rflag_filter<0 ) { fprintf(samtools_stderr,"Could not parse --ff %s\n", optarg); return 1; }
1353             break;
1354         case  3 : mplp.output_fname = optarg; break;
1355         case  4 : mplp.openQ = atoi(optarg); break;
1356         case  5 : mplp.flag |= MPLP_PRINT_QNAME; break;
1357         case  6 : mplp.rev_del = 1; break;
1358         case  7 :
1359             if (build_auxlist(&mplp, optarg) != 0) {
1360                 fprintf(samtools_stderr,"Could not build aux list using '%s'\n", optarg);
1361                 return 1;
1362             }
1363             break;
1364         case 8: mplp.sep = optarg[0]; break;
1365         case 9: mplp.empty = optarg[0]; break;
1366         case 10: mplp.no_ins++; break;
1367         case 11: mplp.no_ins_mods = 1; break;
1368         case 12: mplp.no_del++; break;
1369         case 13: mplp.no_ends = 1; break;
1370         case 'f':
1371             mplp.fai = fai_load(optarg);
1372             if (mplp.fai == NULL) return 1;
1373             mplp.fai_fname = optarg;
1374             break;
1375         case 'd': mplp.max_depth = atoi(optarg); break;
1376         case 'r': mplp.reg = strdup(optarg); break;
1377         case 'l':
1378                   // In the original version the whole BAM was streamed which is inefficient
1379                   //  with few BED intervals and big BAMs. Todo: devise a heuristic to determine
1380                   //  best strategy, that is streaming or jumping.
1381                   mplp.bed = bed_read(optarg);
1382                   if (!mplp.bed) { print_error_errno("mpileup", "Could not read file \"%s\"", optarg); return 1; }
1383                   break;
1384         case 'P': mplp.pl_list = strdup(optarg); deprecated(c); break;
1385         case 'p': mplp.flag |= MPLP_PER_SAMPLE; deprecated(c); break;
1386         case 'g': mplp.flag |= MPLP_BCF; deprecated(c); break;
1387         case 'v': mplp.flag |= MPLP_BCF | MPLP_VCF; deprecated(c); break;
1388         case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_BCF; deprecated(c); break;
1389         case 'B': mplp.flag &= ~MPLP_REALN; break;
1390         case 'X': has_index_file = 1; break;
1391         case 'D': mplp.fmt_flag |= B2B_FMT_DP; deprecated(c); break;
1392         case 'S': mplp.fmt_flag |= B2B_FMT_SP; deprecated(c); break;
1393         case 'V': mplp.fmt_flag |= B2B_FMT_DV; deprecated(c); break;
1394         case 'I': mplp.flag |= MPLP_NO_INDEL; deprecated(c); break;
1395         case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
1396         case '6': mplp.flag |= MPLP_ILLUMINA13; break;
1397         case 'R': mplp.flag |= MPLP_IGNORE_RG; break;
1398         case 's': mplp.flag |= MPLP_PRINT_MAPQ_CHAR; break;
1399         case 'O':
1400             if (!(mplp.flag & MPLP_PRINT_QPOS5))
1401                 mplp.flag |= MPLP_PRINT_QPOS;
1402             break;
1403         case  14:
1404             mplp.flag |=  MPLP_PRINT_QPOS5;
1405             mplp.flag &= ~MPLP_PRINT_QPOS;
1406             break;
1407         case 'M': mplp.flag |= MPLP_PRINT_MODS; break;
1408         case 'C': mplp.capQ_thres = atoi(optarg); break;
1409         case 'q': mplp.min_mq = atoi(optarg); break;
1410         case 'Q': mplp.min_baseQ = atoi(optarg); break;
1411         case 'b': file_list = optarg; break;
1412         case 'o': {
1413                 char *end;
1414                 long value = strtol(optarg, &end, 10);
1415                 // Distinguish between -o INT and -o FILE (a bit of a hack!)
1416                 if (*end == '\0') {
1417                     mplp.openQ = value;
1418                     fprintf(samtools_stderr, "[warning] samtools mpileup option "
1419                             "'--open-prob INT' is functional, but deprecated. "
1420                             "Please switch to using bcftools mpileup in future.\n");
1421                 } else {
1422                     mplp.output_fname = optarg;
1423                 }
1424             }
1425             break;
1426         case 'e': mplp.extQ = atoi(optarg); deprecated(c); break;
1427         case 'h': mplp.tandemQ = atoi(optarg); deprecated(c); break;
1428         case 'A': use_orphan = 1; break;
1429         case 'F': mplp.min_frac = atof(optarg); deprecated(c); break;
1430         case 'm': mplp.min_support = atoi(optarg); deprecated(c); break;
1431         case 'L': mplp.max_indel_depth = atoi(optarg); deprecated(c); break;
1432         case 'G': {
1433                 FILE *fp_rg;
1434                 char buf[1024];
1435                 mplp.rghash = khash_str2int_init();
1436                 if ((fp_rg = fopen(optarg, "r")) == NULL)
1437                     fprintf(samtools_stderr, "[%s] Fail to open file %s. Continue anyway.\n", __func__, optarg);
1438                 while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me...
1439                     khash_str2int_inc(mplp.rghash, strdup(buf));
1440                 fclose(fp_rg);
1441             }
1442             break;
1443         case 't': mplp.fmt_flag |= parse_format_flag(optarg); deprecated(c); break;
1444         case 'a': mplp.all++; break;
1445         default:
1446             if (parse_sam_global_opt(c, optarg, lopts, &mplp.ga) == 0) break;
1447             /* else fall-through */
1448         case '?':
1449             print_usage(samtools_stderr, &mplp);
1450             return 1;
1451         }
1452     }
1453     if (!mplp.fai && mplp.ga.reference) {
1454         mplp.fai_fname = mplp.ga.reference;
1455         mplp.fai = fai_load(mplp.fai_fname);
1456         if (mplp.fai == NULL) return 1;
1457     }
1458 
1459     if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ )
1460     {
1461         fprintf(samtools_stderr,"Error: The -B option cannot be combined with -E\n");
1462         return 1;
1463     }
1464     if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
1465     if (argc == 1)
1466     {
1467         print_usage(samtools_stderr, &mplp);
1468         return 1;
1469     }
1470     int ret;
1471     if (file_list) {
1472         if (has_index_file) {
1473             fprintf(samtools_stderr,"Error: The -b option cannot be combined with -X\n"); // No customize index loc in file list mode
1474             return 1;
1475         }
1476         if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
1477         ret = mpileup(&mplp,nfiles,fn,NULL);
1478         for (c=0; c<nfiles; c++) free(fn[c]);
1479         free(fn);
1480     }
1481     else {
1482         if (has_index_file) {
1483             if ((argc - optind)%2 !=0) { // Calculate # of input BAM files
1484                 fprintf(samtools_stderr, "Odd number of filenames detected! Each BAM file should have an index file\n");
1485                 return 1;
1486             }
1487             nfiles = (argc - optind)/2;
1488             ret = mpileup(&mplp, nfiles, argv + optind, argv + nfiles + optind);
1489         } else {
1490             nfiles = argc - optind;
1491             ret = mpileup(&mplp, nfiles, argv + optind, NULL);
1492         }
1493     }
1494     if (mplp.rghash) khash_str2int_destroy_free(mplp.rghash);
1495     free(mplp.reg); free(mplp.pl_list);
1496     if (mplp.fai) fai_destroy(mplp.fai);
1497     if (mplp.bed) bed_destroy(mplp.bed);
1498     if (mplp.auxlist) kl_destroy(auxlist, (klist_t(auxlist) *)mplp.auxlist);
1499     return ret;
1500 }
1501