1 #include "samtools.pysam.h"
2
3 /* bam_plcmd.c -- mpileup subcommand.
4
5 Copyright (C) 2008-2015, 2019-2021 Genome Research Ltd.
6 Portions copyright (C) 2009-2012 Broad Institute.
7
8 Author: Heng Li <lh3@sanger.ac.uk>
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 DEALINGS IN THE SOFTWARE. */
27
28 #include <config.h>
29
30 #include <math.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <ctype.h>
35 #include <string.h>
36 #include <strings.h>
37 #include <limits.h>
38 #include <errno.h>
39 #include <sys/stat.h>
40 #include <getopt.h>
41 #include <inttypes.h>
42 #include <htslib/sam.h>
43 #include <htslib/faidx.h>
44 #include <htslib/kstring.h>
45 #include <htslib/klist.h>
46 #include <htslib/khash_str2int.h>
47 #include <htslib/cram.h>
48 #include "samtools.h"
49 #include "bedidx.h"
50 #include "sam_opts.h"
51
52 #define dummy_free(p)
KLIST_INIT(auxlist,char *,dummy_free)53 KLIST_INIT(auxlist, char *, dummy_free)
54
55 static inline int printw(int c, FILE *fp)
56 {
57 char buf[16];
58 int l, x;
59 if (c == 0) return fputc('0', fp);
60 for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
61 if (c < 0) buf[l++] = '-';
62 buf[l] = 0;
63 for (x = 0; x < l/2; ++x) {
64 int y = buf[x]; buf[x] = buf[l-1-x]; buf[l-1-x] = y;
65 }
66 fputs(buf, fp);
67 return 0;
68 }
69
pileup_seq(FILE * fp,const bam_pileup1_t * p,hts_pos_t pos,hts_pos_t ref_len,const char * ref,kstring_t * ks,int rev_del,int no_ins,int no_ins_mods,int no_del,int no_ends)70 static inline int pileup_seq(FILE *fp, const bam_pileup1_t *p, hts_pos_t pos,
71 hts_pos_t ref_len, const char *ref, kstring_t *ks,
72 int rev_del, int no_ins, int no_ins_mods,
73 int no_del, int no_ends)
74 {
75 no_ins_mods |= no_ins;
76 int j;
77 hts_base_mod_state *m = p->cd.p;
78 if (!no_ends && p->is_head) {
79 putc('^', fp);
80 putc(p->b->core.qual > 93? 126 : p->b->core.qual + 33, fp);
81 }
82 if (!p->is_del) {
83 int c = p->qpos < p->b->core.l_qseq
84 ? seq_nt16_str[bam_seqi(bam_get_seq(p->b), p->qpos)]
85 : 'N';
86 if (ref) {
87 int rb = pos < ref_len? ref[pos] : 'N';
88 if (c == '=' || seq_nt16_table[c] == seq_nt16_table[rb]) c = bam_is_rev(p->b)? ',' : '.';
89 else c = bam_is_rev(p->b)? tolower(c) : toupper(c);
90 } else {
91 if (c == '=') c = bam_is_rev(p->b)? ',' : '.';
92 else c = bam_is_rev(p->b)? tolower(c) : toupper(c);
93 }
94 putc(c, fp);
95 if (m) {
96 int nm;
97 hts_base_mod mod[256];
98 if ((nm = bam_mods_at_qpos(p->b, p->qpos, m, mod, 256)) > 0) {
99 putc('[', fp);
100 int j;
101 for (j = 0; j < nm && j < 256; j++) {
102 char qual[20];
103 if (mod[j].qual >= 0)
104 sprintf(qual, "%d", mod[j].qual);
105 else
106 *qual = 0;
107 if (mod[j].modified_base < 0)
108 // ChEBI
109 fprintf(fp, "%c(%d)%s", "+-"[mod[j].strand],
110 -mod[j].modified_base, qual);
111 else
112 fprintf(fp, "%c%c%s", "+-"[mod[j].strand],
113 mod[j].modified_base, qual);
114 }
115 putc(']', fp);
116 }
117 }
118 } else putc(p->is_refskip? (bam_is_rev(p->b)? '<' : '>') : ((bam_is_rev(p->b) && rev_del) ? '#' : '*'), fp);
119 int del_len = -p->indel;
120 if (p->indel > 0) {
121 int len = bam_plp_insertion_mod(p, m && !no_ins_mods ? m : NULL,
122 ks, &del_len);
123 if (len < 0) {
124 print_error("mpileup", "bam_plp_insertion() failed");
125 return -1;
126 }
127 if (no_ins < 2) {
128 putc('+', fp);
129 printw(len, fp);
130 }
131 if (!no_ins) {
132 if (bam_is_rev(p->b)) {
133 char pad = rev_del ? '#' : '*';
134 int in_mod = 0;
135 for (j = 0; j < ks->l; j++) {
136 if (ks->s[j] == '[') in_mod = 1;
137 else if (ks->s[j] == ']') in_mod = 0;
138 putc(ks->s[j] != '*'
139 ? (in_mod ? ks->s[j] : tolower(ks->s[j]))
140 : pad, fp);
141 }
142 } else {
143 int in_mod = 0;
144 for (j = 0; j < ks->l; j++) {
145 if (ks->s[j] == '[') in_mod = 1;
146 if (ks->s[j] == ']') in_mod = 0;
147 putc(in_mod ? ks->s[j] : toupper(ks->s[j]), fp);
148 }
149 }
150 }
151 }
152 if (del_len > 0) {
153 if (no_del < 2)
154 printw(-del_len, fp);
155 if (!no_del) {
156 for (j = 1; j <= del_len; ++j) {
157 int c = (ref && (int)pos+j < ref_len)? ref[pos+j] : 'N';
158 putc(bam_is_rev(p->b)? tolower(c) : toupper(c), fp);
159 }
160 }
161 }
162 if (!no_ends && p->is_tail) putc('$', fp);
163 return 0;
164 }
165
166 #include <assert.h>
167 #include "bam2bcf.h"
168 #include "sample.h"
169
170 #define MPLP_BCF 1
171 #define MPLP_VCF (1<<1)
172 #define MPLP_NO_COMP (1<<2)
173 #define MPLP_NO_ORPHAN (1<<3)
174 #define MPLP_REALN (1<<4)
175 #define MPLP_NO_INDEL (1<<5)
176 #define MPLP_REDO_BAQ (1<<6)
177 #define MPLP_ILLUMINA13 (1<<7)
178 #define MPLP_IGNORE_RG (1<<8)
179 #define MPLP_PER_SAMPLE (1<<9)
180 #define MPLP_SMART_OVERLAPS (1<<10)
181
182 #define MPLP_PRINT_MAPQ_CHAR (1<<11)
183 #define MPLP_PRINT_QPOS (1<<12)
184 #define MPLP_PRINT_QNAME (1<<13)
185 #define MPLP_PRINT_FLAG (1<<14)
186 #define MPLP_PRINT_RNAME (1<<15)
187 #define MPLP_PRINT_POS (1<<16)
188 #define MPLP_PRINT_MAPQ (1<<17)
189 #define MPLP_PRINT_CIGAR (1<<18)
190 #define MPLP_PRINT_RNEXT (1<<19)
191 #define MPLP_PRINT_PNEXT (1<<20)
192 #define MPLP_PRINT_TLEN (1<<21)
193 #define MPLP_PRINT_SEQ (1<<22)
194 #define MPLP_PRINT_QUAL (1<<23)
195 #define MPLP_PRINT_MODS (1<<24)
196 #define MPLP_PRINT_QPOS5 (1<<25)
197
198 #define MPLP_PRINT_LAST (1<<26) // terminator for loop
199
200 #define MPLP_MAX_DEPTH 8000
201 #define MPLP_MAX_INDEL_DEPTH 250
202
203 typedef struct {
204 int min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag, all, rev_del;
205 int rflag_require, rflag_filter;
206 int openQ, extQ, tandemQ, min_support; // for indels
207 double min_frac; // for indels
208 char *reg, *pl_list, *fai_fname, *output_fname;
209 faidx_t *fai;
210 void *bed, *rghash, *auxlist;
211 int argc;
212 char **argv;
213 char sep, empty, no_ins, no_ins_mods, no_del, no_ends;
214 sam_global_args ga;
215 } mplp_conf_t;
216
217 typedef struct {
218 char *ref[2];
219 int ref_id[2];
220 hts_pos_t ref_len[2];
221 } mplp_ref_t;
222
223 #define MPLP_REF_INIT {{NULL,NULL},{-1,-1},{0,0}}
224
225 typedef struct {
226 samFile *fp;
227 hts_itr_t *iter;
228 sam_hdr_t *h;
229 mplp_ref_t *ref;
230 const mplp_conf_t *conf;
231 } mplp_aux_t;
232
233 typedef struct {
234 int n;
235 int *n_plp, *m_plp;
236 bam_pileup1_t **plp;
237 } mplp_pileup_t;
238
build_auxlist(mplp_conf_t * conf,char * optstring)239 static int build_auxlist(mplp_conf_t *conf, char *optstring) {
240 if (!optstring)
241 return 0;
242
243 void *colhash = khash_str2int_init();
244 if (!colhash)
245 return 1;
246
247 struct active_cols {
248 char *name;
249 int supported;
250 };
251
252 const struct active_cols colnames[11] = {
253 {"QNAME", 1}, {"FLAG", 1}, {"RNAME", 1}, {"POS", 1}, {"MAPQ", 1}, {"CIGAR", 0}, {"RNEXT", 1}, {"PNEXT", 1}, {"TLEN", 0}, {"SEQ", 0}, {"QUAL", 0}
254 };
255
256 int i, f = MPLP_PRINT_QNAME, colno = 11;
257 for (i = 0; i < colno; i++, f <<= 1)
258 if (colnames[i].supported)
259 khash_str2int_set(colhash, colnames[i].name, f);
260
261 conf->auxlist = kl_init(auxlist);
262 if (!conf->auxlist)
263 return 1;
264
265 char *save_p;
266 char *tag = strtok_r(optstring, ",", &save_p);
267 while (tag) {
268 if (khash_str2int_get(colhash, tag, &f) == 0) {
269 conf->flag |= f;
270 } else {
271 if (strlen(tag) != 2) {
272 fprintf(samtools_stderr, "[%s] tag '%s' has more than two characters or not supported\n", __func__, tag);
273 } else {
274 char **tag_p = kl_pushp(auxlist, conf->auxlist);
275 *tag_p = tag;
276 }
277 }
278 tag = strtok_r(NULL, ",", &save_p);
279 }
280
281 khash_str2int_destroy(colhash);
282
283 return 0;
284 }
285
mplp_get_ref(mplp_aux_t * ma,int tid,char ** ref,hts_pos_t * ref_len)286 static int mplp_get_ref(mplp_aux_t *ma, int tid, char **ref, hts_pos_t *ref_len) {
287 mplp_ref_t *r = ma->ref;
288
289 //printf("get ref %d {%d/%p, %d/%p}\n", tid, r->ref_id[0], r->ref[0], r->ref_id[1], r->ref[1]);
290
291 if (!r || !ma->conf->fai) {
292 *ref = NULL;
293 return 0;
294 }
295
296 // Do we need to reference count this so multiple mplp_aux_t can
297 // track which references are in use?
298 // For now we just cache the last two. Sufficient?
299 if (tid == r->ref_id[0]) {
300 *ref = r->ref[0];
301 *ref_len = r->ref_len[0];
302 return 1;
303 }
304 if (tid == r->ref_id[1]) {
305 // Last, swap over
306 int tmp_id;
307 hts_pos_t tmp_len;
308 tmp_id = r->ref_id[0]; r->ref_id[0] = r->ref_id[1]; r->ref_id[1] = tmp_id;
309 tmp_len = r->ref_len[0]; r->ref_len[0] = r->ref_len[1]; r->ref_len[1] = tmp_len;
310
311 char *tc;
312 tc = r->ref[0]; r->ref[0] = r->ref[1]; r->ref[1] = tc;
313 *ref = r->ref[0];
314 *ref_len = r->ref_len[0];
315 return 1;
316 }
317
318 // New, so migrate to old and load new
319 free(r->ref[1]);
320 r->ref[1] = r->ref[0];
321 r->ref_id[1] = r->ref_id[0];
322 r->ref_len[1] = r->ref_len[0];
323
324 r->ref_id[0] = tid;
325 r->ref[0] = faidx_fetch_seq64(ma->conf->fai,
326 sam_hdr_tid2name(ma->h, r->ref_id[0]),
327 0,
328 HTS_POS_MAX,
329 &r->ref_len[0]);
330
331 if (!r->ref[0]) {
332 r->ref[0] = NULL;
333 r->ref_id[0] = -1;
334 r->ref_len[0] = 0;
335 *ref = NULL;
336 return 0;
337 }
338
339 *ref = r->ref[0];
340 *ref_len = r->ref_len[0];
341 return 1;
342 }
343
344 // Initialise and destroy the base modifier state data. This is called
345 // as each new read is added or removed from the pileups.
346 static
pileup_cd_create(void * data,const bam1_t * b,bam_pileup_cd * cd)347 int pileup_cd_create(void *data, const bam1_t *b, bam_pileup_cd *cd) {
348 int ret;
349 hts_base_mod_state *m = hts_base_mod_state_alloc();
350 ret = bam_parse_basemod(b, m);
351 cd->p = m;
352 return ret;
353 }
354
355 static
pileup_cd_destroy(void * data,const bam1_t * b,bam_pileup_cd * cd)356 int pileup_cd_destroy(void *data, const bam1_t *b, bam_pileup_cd *cd) {
357 hts_base_mod_state_free(cd->p);
358 return 0;
359 }
360
361 static void
print_empty_pileup(FILE * fp,const mplp_conf_t * conf,const char * tname,hts_pos_t pos,int n,const char * ref,hts_pos_t ref_len)362 print_empty_pileup(FILE *fp, const mplp_conf_t *conf, const char *tname,
363 hts_pos_t pos, int n, const char *ref, hts_pos_t ref_len)
364 {
365 int i;
366 fprintf(fp, "%s\t%"PRIhts_pos"\t%c", tname, pos+1, (ref && pos < ref_len)? ref[pos] : 'N');
367 for (i = 0; i < n; ++i) {
368 fputs("\t0\t*\t*", fp);
369 int flag_value = MPLP_PRINT_MAPQ_CHAR;
370 while(flag_value < MPLP_PRINT_LAST) {
371 if (flag_value != MPLP_PRINT_MODS && (conf->flag & flag_value))
372 fputs("\t*", fp);
373 flag_value <<= 1;
374 }
375 if (conf->auxlist) {
376 int t = 0;
377 while(t++ < ((klist_t(auxlist) *)conf->auxlist)->size)
378 fputs("\t*", fp);
379 }
380 }
381 putc('\n', fp);
382 }
383
mplp_func(void * data,bam1_t * b)384 static int mplp_func(void *data, bam1_t *b)
385 {
386 char *ref;
387 mplp_aux_t *ma = (mplp_aux_t*)data;
388 int ret, skip = 0;
389 hts_pos_t ref_len;
390
391 do {
392 int has_ref;
393 ret = ma->iter? sam_itr_next(ma->fp, ma->iter, b) : sam_read1(ma->fp, ma->h, b);
394 if (ret < 0) break;
395 // The 'B' cigar operation is not part of the specification, considering as obsolete.
396 // bam_remove_B(b);
397 if (b->core.tid < 0 || (b->core.flag&BAM_FUNMAP)) { // exclude unmapped reads
398 skip = 1;
399 continue;
400 }
401 if (ma->conf->rflag_require && !(ma->conf->rflag_require&b->core.flag)) { skip = 1; continue; }
402 if (ma->conf->rflag_filter && ma->conf->rflag_filter&b->core.flag) { skip = 1; continue; }
403 if (ma->conf->bed && ma->conf->all == 0) { // test overlap
404 skip = !bed_overlap(ma->conf->bed, sam_hdr_tid2name(ma->h, b->core.tid), b->core.pos, bam_endpos(b));
405 if (skip) continue;
406 }
407 if (ma->conf->rghash) { // exclude read groups
408 uint8_t *rg = bam_aux_get(b, "RG");
409 skip = (rg && khash_str2int_get(ma->conf->rghash, (const char*)(rg+1), NULL)==0);
410 if (skip) continue;
411 }
412 if (ma->conf->flag & MPLP_ILLUMINA13) {
413 int i;
414 uint8_t *qual = bam_get_qual(b);
415 for (i = 0; i < b->core.l_qseq; ++i)
416 qual[i] = qual[i] > 31? qual[i] - 31 : 0;
417 }
418
419 if (ma->conf->fai && b->core.tid >= 0) {
420 has_ref = mplp_get_ref(ma, b->core.tid, &ref, &ref_len);
421 if (has_ref && ref_len <= b->core.pos) { // exclude reads outside of the reference sequence
422 fprintf(samtools_stderr,"[%s] Skipping because %"PRIhts_pos" is outside of %"PRIhts_pos" [ref:%d]\n",
423 __func__, (int64_t) b->core.pos, ref_len, b->core.tid);
424 skip = 1;
425 continue;
426 }
427 } else {
428 has_ref = 0;
429 }
430
431 skip = 0;
432 if (has_ref && (ma->conf->flag&MPLP_REALN)) sam_prob_realn(b, ref, ref_len, (ma->conf->flag & MPLP_REDO_BAQ)? 7 : 3);
433 if (has_ref && ma->conf->capQ_thres > 10) {
434 int q = sam_cap_mapq(b, ref, ref_len, ma->conf->capQ_thres);
435 if (q < 0) skip = 1;
436 else if (b->core.qual > q) b->core.qual = q;
437 }
438 if (b->core.qual < ma->conf->min_mq) skip = 1;
439 else if ((ma->conf->flag&MPLP_NO_ORPHAN) && (b->core.flag&BAM_FPAIRED) && !(b->core.flag&BAM_FPROPER_PAIR)) skip = 1;
440 } while (skip);
441 return ret;
442 }
443
group_smpl(mplp_pileup_t * m,bam_sample_t * sm,kstring_t * buf,int n,char * const * fn,int * n_plp,const bam_pileup1_t ** plp,int ignore_rg)444 static void group_smpl(mplp_pileup_t *m, bam_sample_t *sm, kstring_t *buf,
445 int n, char *const*fn, int *n_plp, const bam_pileup1_t **plp, int ignore_rg)
446 {
447 int i, j;
448 memset(m->n_plp, 0, m->n * sizeof(int));
449 for (i = 0; i < n; ++i) {
450 for (j = 0; j < n_plp[i]; ++j) {
451 const bam_pileup1_t *p = plp[i] + j;
452 uint8_t *q;
453 int id = -1;
454 q = ignore_rg? NULL : bam_aux_get(p->b, "RG");
455 if (q) id = bam_smpl_rg2smid(sm, fn[i], (char*)q+1, buf);
456 if (id < 0) id = bam_smpl_rg2smid(sm, fn[i], 0, buf);
457 if (id < 0 || id >= m->n) {
458 assert(q); // otherwise a bug
459 fprintf(samtools_stderr, "[%s] Read group %s used in file %s but absent from the header or an alignment missing read group.\n", __func__, (char*)q+1, fn[i]);
460 samtools_exit(EXIT_FAILURE);
461 }
462 if (m->n_plp[id] == m->m_plp[id]) {
463 m->m_plp[id] = m->m_plp[id]? m->m_plp[id]<<1 : 8;
464 m->plp[id] = realloc(m->plp[id], sizeof(bam_pileup1_t) * m->m_plp[id]);
465 }
466 m->plp[id][m->n_plp[id]++] = *p;
467 }
468 }
469 }
470
471 /*
472 * Performs pileup
473 * @param conf configuration for this pileup
474 * @param n number of files specified in fn
475 * @param fn filenames
476 * @param fn_idx index filenames
477 */
mpileup(mplp_conf_t * conf,int n,char ** fn,char ** fn_idx)478 static int mpileup(mplp_conf_t *conf, int n, char **fn, char **fn_idx)
479 {
480 extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list);
481 extern void bcf_call_del_rghash(void *rghash);
482 mplp_aux_t **data;
483 int i, tid, *n_plp, tid0 = 0, max_depth, max_indel_depth;
484 hts_pos_t pos, beg0 = 0, end0 = HTS_POS_MAX, ref_len;
485 const bam_pileup1_t **plp;
486 mplp_ref_t mp_ref = MPLP_REF_INIT;
487 bam_mplp_t iter;
488 sam_hdr_t *h = NULL; /* header of first file in input list */
489 char *ref;
490 void *rghash = NULL;
491 FILE *pileup_fp = NULL;
492
493 bcf_callaux_t *bca = NULL;
494 bcf_callret1_t *bcr = NULL;
495 bcf_call_t bc;
496 htsFile *bcf_fp = NULL;
497 bcf_hdr_t *bcf_hdr = NULL;
498
499 bam_sample_t *sm = NULL;
500 kstring_t buf;
501 mplp_pileup_t gplp;
502
503 memset(&gplp, 0, sizeof(mplp_pileup_t));
504 memset(&buf, 0, sizeof(kstring_t));
505 memset(&bc, 0, sizeof(bcf_call_t));
506 data = calloc(n, sizeof(mplp_aux_t*));
507 plp = calloc(n, sizeof(bam_pileup1_t*));
508 n_plp = calloc(n, sizeof(int));
509 sm = bam_smpl_init();
510
511 if (n == 0) {
512 fprintf(samtools_stderr,"[%s] no input file/data given\n", __func__);
513 samtools_exit(EXIT_FAILURE);
514 }
515
516 // read the header of each file in the list and initialize data
517 refs_t *refs = NULL;
518 for (i = 0; i < n; ++i) {
519 sam_hdr_t *h_tmp;
520 data[i] = calloc(1, sizeof(mplp_aux_t));
521 data[i]->fp = sam_open_format(fn[i], "rb", &conf->ga.in);
522 if ( !data[i]->fp )
523 {
524 fprintf(samtools_stderr, "[%s] failed to open %s: %s\n", __func__, fn[i], strerror(errno));
525 samtools_exit(EXIT_FAILURE);
526 }
527 if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
528 fprintf(samtools_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
529 samtools_exit(EXIT_FAILURE);
530 }
531
532 if (!refs && conf->fai_fname) {
533 if (hts_set_fai_filename(data[i]->fp, conf->fai_fname) != 0) {
534 fprintf(samtools_stderr, "[%s] failed to process %s: %s\n",
535 __func__, conf->fai_fname, strerror(errno));
536 samtools_exit(EXIT_FAILURE);
537 }
538 refs = cram_get_refs(data[i]->fp);
539 } else if (conf->fai_fname) {
540 if (hts_set_opt(data[i]->fp, CRAM_OPT_SHARED_REF, refs) != 0) {
541 fprintf(samtools_stderr, "[%s] failed to process %s: %s\n",
542 __func__, conf->fai_fname, strerror(errno));
543 samtools_exit(EXIT_FAILURE);
544 }
545 }
546
547 data[i]->conf = conf;
548 data[i]->ref = &mp_ref;
549 h_tmp = sam_hdr_read(data[i]->fp);
550 if ( !h_tmp ) {
551 fprintf(samtools_stderr,"[%s] fail to read the header of %s\n", __func__, fn[i]);
552 samtools_exit(EXIT_FAILURE);
553 }
554 bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : sam_hdr_str(h_tmp));
555 if (conf->flag & MPLP_BCF) {
556 // Collect read group IDs with PL (platform) listed in pl_list (note: fragile, strstr search)
557 rghash = bcf_call_add_rg(rghash, sam_hdr_str(h_tmp), conf->pl_list);
558 }
559 if (conf->reg) {
560 hts_idx_t *idx = NULL;
561 // If index filename has not been specfied, look in BAM folder
562 if (fn_idx != NULL) {
563 idx = sam_index_load2(data[i]->fp, fn[i], fn_idx[i]);
564 } else {
565 idx = sam_index_load(data[i]->fp, fn[i]);
566 }
567
568 if (idx == NULL) {
569 fprintf(samtools_stderr, "[%s] fail to load index for %s\n", __func__, fn[i]);
570 samtools_exit(EXIT_FAILURE);
571 }
572 if ( (data[i]->iter=sam_itr_querys(idx, h_tmp, conf->reg)) == 0) {
573 fprintf(samtools_stderr, "[E::%s] fail to parse region '%s' with %s\n", __func__, conf->reg, fn[i]);
574 samtools_exit(EXIT_FAILURE);
575 }
576 if (i == 0) beg0 = data[i]->iter->beg, end0 = data[i]->iter->end, tid0 = data[i]->iter->tid;
577 hts_idx_destroy(idx);
578 }
579 else
580 data[i]->iter = NULL;
581
582 if (i == 0) h = data[i]->h = h_tmp; // save the header of the first file
583 else {
584 // FIXME: check consistency between h and h_tmp
585 sam_hdr_destroy(h_tmp);
586
587 // we store only the first file's header; it's (alleged to be)
588 // compatible with the i-th file's target_name lookup needs
589 data[i]->h = h;
590 }
591 }
592 fprintf(samtools_stderr, "[%s] %d samples in %d input files\n", __func__, sm->n, n);
593 if (conf->flag & MPLP_BCF)
594 {
595 const char *mode;
596 // allocate data storage proportionate to number of samples being studied sm->n
597 gplp.n = sm->n;
598 gplp.n_plp = calloc(sm->n, sizeof(int));
599 gplp.m_plp = calloc(sm->n, sizeof(int));
600 gplp.plp = calloc(sm->n, sizeof(bam_pileup1_t*));
601
602 // write the VCF header
603
604 if ( conf->flag & MPLP_VCF )
605 mode = (conf->flag&MPLP_NO_COMP)? "wu" : "wz"; // uncompressed VCF or compressed VCF
606 else
607 mode = (conf->flag&MPLP_NO_COMP)? "wub" : "wb"; // uncompressed BCF or compressed BCF
608
609 bcf_fp = bcf_open(conf->output_fname? conf->output_fname : "-", mode);
610 if (bcf_fp == NULL) {
611 fprintf(samtools_stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
612 samtools_exit(EXIT_FAILURE);
613 }
614 autoflush_if_stdout(bcf_fp, conf->output_fname);
615
616 // BCF header creation
617 bcf_hdr = bcf_hdr_init("w");
618 kstring_t str = {0,0,NULL};
619
620 ksprintf(&str, "##samtoolsVersion=%s+htslib-%s\n",samtools_version(),hts_version());
621 bcf_hdr_append(bcf_hdr, str.s);
622
623 str.l = 0;
624 ksprintf(&str, "##samtoolsCommand=samtools mpileup");
625 for (i=1; i<conf->argc; i++) ksprintf(&str, " %s", conf->argv[i]);
626 kputc('\n', &str);
627 bcf_hdr_append(bcf_hdr, str.s);
628
629 if (conf->fai_fname)
630 {
631 str.l = 0;
632 ksprintf(&str, "##reference=file://%s\n", conf->fai_fname);
633 bcf_hdr_append(bcf_hdr, str.s);
634 }
635
636 // Translate BAM @SQ tags to BCF ##contig tags
637 // todo: use/write new BAM header manipulation routines, fill also UR, M5
638 for (i=0; i < sam_hdr_nref(h); i++)
639 {
640 str.l = 0;
641 ksprintf(&str, "##contig=<ID=%s,length=%"PRId64">", sam_hdr_tid2name(h, i), (int64_t) sam_hdr_tid2len(h, i));
642 bcf_hdr_append(bcf_hdr, str.s);
643 }
644 free(str.s);
645 bcf_hdr_append(bcf_hdr,"##ALT=<ID=*,Description=\"Represents allele(s) other than observed.\">");
646 bcf_hdr_append(bcf_hdr,"##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">");
647 bcf_hdr_append(bcf_hdr,"##INFO=<ID=IDV,Number=1,Type=Integer,Description=\"Maximum number of reads supporting an indel\">");
648 bcf_hdr_append(bcf_hdr,"##INFO=<ID=IMF,Number=1,Type=Float,Description=\"Maximum fraction of reads supporting an indel\">");
649 bcf_hdr_append(bcf_hdr,"##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">");
650 bcf_hdr_append(bcf_hdr,"##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias for filtering splice-site artefacts in RNA-seq data (bigger is better)\",Version=\"3\">");
651 bcf_hdr_append(bcf_hdr,"##INFO=<ID=RPB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias (bigger is better)\">");
652 bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias (bigger is better)\">");
653 bcf_hdr_append(bcf_hdr,"##INFO=<ID=BQB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias (bigger is better)\">");
654 bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQSB,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias (bigger is better)\">");
655 #if CDF_MWU_TESTS
656 bcf_hdr_append(bcf_hdr,"##INFO=<ID=RPB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Read Position Bias [CDF] (bigger is better)\">");
657 bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality Bias [CDF] (bigger is better)\">");
658 bcf_hdr_append(bcf_hdr,"##INFO=<ID=BQB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Base Quality Bias [CDF] (bigger is better)\">");
659 bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQSB2,Number=1,Type=Float,Description=\"Mann-Whitney U test of Mapping Quality vs Strand Bias [CDF] (bigger is better)\">");
660 #endif
661 bcf_hdr_append(bcf_hdr,"##INFO=<ID=SGB,Number=1,Type=Float,Description=\"Segregation based metric.\">");
662 bcf_hdr_append(bcf_hdr,"##INFO=<ID=MQ0F,Number=1,Type=Float,Description=\"Fraction of MQ0 reads (smaller is better)\">");
663 bcf_hdr_append(bcf_hdr,"##INFO=<ID=I16,Number=16,Type=Float,Description=\"Auxiliary tag used for calling, see description of bcf_callret1_t in bam2bcf.h\">");
664 bcf_hdr_append(bcf_hdr,"##INFO=<ID=QS,Number=R,Type=Float,Description=\"Auxiliary tag used for calling\">");
665 bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"List of Phred-scaled genotype likelihoods\">");
666 if ( conf->fmt_flag&B2B_FMT_DP )
667 bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Number of high-quality bases\">");
668 if ( conf->fmt_flag&B2B_FMT_DV )
669 bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"Number of high-quality non-reference bases\">");
670 if ( conf->fmt_flag&B2B_FMT_DPR )
671 bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
672 if ( conf->fmt_flag&B2B_INFO_DPR )
673 bcf_hdr_append(bcf_hdr,"##INFO=<ID=DPR,Number=R,Type=Integer,Description=\"Number of high-quality bases observed for each allele\">");
674 if ( conf->fmt_flag&B2B_FMT_DP4 )
675 bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=DP4,Number=4,Type=Integer,Description=\"Number of high-quality ref-fwd, ref-reverse, alt-fwd and alt-reverse bases\">");
676 if ( conf->fmt_flag&B2B_FMT_SP )
677 bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=SP,Number=1,Type=Integer,Description=\"Phred-scaled strand bias P-value\">");
678 if ( conf->fmt_flag&B2B_FMT_AD )
679 bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths\">");
680 if ( conf->fmt_flag&B2B_FMT_ADF )
681 bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=ADF,Number=R,Type=Integer,Description=\"Allelic depths on the forward strand\">");
682 if ( conf->fmt_flag&B2B_FMT_ADR )
683 bcf_hdr_append(bcf_hdr,"##FORMAT=<ID=ADR,Number=R,Type=Integer,Description=\"Allelic depths on the reverse strand\">");
684 if ( conf->fmt_flag&B2B_INFO_AD )
685 bcf_hdr_append(bcf_hdr,"##INFO=<ID=AD,Number=R,Type=Integer,Description=\"Total allelic depths\">");
686 if ( conf->fmt_flag&B2B_INFO_ADF )
687 bcf_hdr_append(bcf_hdr,"##INFO=<ID=ADF,Number=R,Type=Integer,Description=\"Total allelic depths on the forward strand\">");
688 if ( conf->fmt_flag&B2B_INFO_ADR )
689 bcf_hdr_append(bcf_hdr,"##INFO=<ID=ADR,Number=R,Type=Integer,Description=\"Total allelic depths on the reverse strand\">");
690
691 for (i=0; i<sm->n; i++)
692 bcf_hdr_add_sample(bcf_hdr, sm->smpl[i]);
693 bcf_hdr_add_sample(bcf_hdr, NULL);
694 if (bcf_hdr_write(bcf_fp, bcf_hdr) != 0) {
695 print_error_errno("mpileup", "Failed to write VCF/BCF header to \"%s\"",
696 conf->output_fname? conf->output_fname : "standard output");
697 samtools_exit(EXIT_FAILURE);
698 }
699 // End of BCF header creation
700
701 // Initialise the calling algorithm
702 bca = bcf_call_init(-1., conf->min_baseQ);
703 bcr = calloc(sm->n, sizeof(bcf_callret1_t));
704 bca->rghash = rghash;
705 bca->openQ = conf->openQ, bca->extQ = conf->extQ, bca->tandemQ = conf->tandemQ;
706 bca->min_frac = conf->min_frac;
707 bca->min_support = conf->min_support;
708 bca->per_sample_flt = conf->flag & MPLP_PER_SAMPLE;
709
710 bc.bcf_hdr = bcf_hdr;
711 bc.n = sm->n;
712 bc.PL = malloc(15 * sm->n * sizeof(*bc.PL));
713 if (conf->fmt_flag)
714 {
715 assert( sizeof(float)==sizeof(int32_t) );
716 bc.DP4 = malloc(sm->n * sizeof(int32_t) * 4);
717 bc.fmt_arr = malloc(sm->n * sizeof(float)); // all fmt_flag fields
718 if ( conf->fmt_flag&(B2B_INFO_DPR|B2B_FMT_DPR|B2B_INFO_AD|B2B_INFO_ADF|B2B_INFO_ADR|B2B_FMT_AD|B2B_FMT_ADF|B2B_FMT_ADR) )
719 {
720 // first B2B_MAX_ALLELES fields for total numbers, the rest per-sample
721 bc.ADR = (int32_t*) malloc((sm->n+1)*B2B_MAX_ALLELES*sizeof(int32_t));
722 bc.ADF = (int32_t*) malloc((sm->n+1)*B2B_MAX_ALLELES*sizeof(int32_t));
723 for (i=0; i<sm->n; i++)
724 {
725 bcr[i].ADR = bc.ADR + (i+1)*B2B_MAX_ALLELES;
726 bcr[i].ADF = bc.ADF + (i+1)*B2B_MAX_ALLELES;
727 }
728 }
729 }
730 }
731 else {
732 pileup_fp = conf->output_fname? fopen(conf->output_fname, "w") : samtools_stdout;
733
734 if (pileup_fp == NULL) {
735 fprintf(samtools_stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname, strerror(errno));
736 samtools_exit(EXIT_FAILURE);
737 }
738 }
739
740 // init pileup
741 iter = bam_mplp_init(n, mplp_func, (void**)data);
742 if (conf->flag & MPLP_PRINT_MODS) {
743 bam_mplp_constructor(iter, pileup_cd_create);
744 bam_mplp_destructor(iter, pileup_cd_destroy);
745 }
746 if ( conf->flag & MPLP_SMART_OVERLAPS ) bam_mplp_init_overlaps(iter);
747 if ( !conf->max_depth ) {
748 max_depth = INT_MAX;
749 fprintf(samtools_stderr, "[%s] Max depth set to maximum value (%d)\n", __func__, INT_MAX);
750 } else {
751 max_depth = conf->max_depth;
752 if ( max_depth * n > 1<<20 )
753 fprintf(samtools_stderr, "[%s] Combined max depth is above 1M. Potential memory hog!\n", __func__);
754 }
755
756 // Only used when writing BCF
757 max_indel_depth = conf->max_indel_depth * sm->n;
758 bam_mplp_set_maxcnt(iter, max_depth);
759 bcf1_t *bcf_rec = bcf_init1();
760 int ret;
761 int last_tid = -1;
762 hts_pos_t last_pos = -1;
763
764 // begin pileup
765 while ( (ret=bam_mplp64_auto(iter, &tid, &pos, n_plp, plp)) > 0) {
766 if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
767 mplp_get_ref(data[0], tid, &ref, &ref_len);
768 //printf("tid=%d len=%d ref=%p/%s\n", tid, ref_len, ref, ref);
769 if (conf->flag & MPLP_BCF) {
770 int total_depth, _ref0, ref16;
771 if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, sam_hdr_tid2name(h, tid), pos, pos+1)) continue;
772 for (i = total_depth = 0; i < n; ++i) total_depth += n_plp[i];
773 group_smpl(&gplp, sm, &buf, n, fn, n_plp, plp, conf->flag & MPLP_IGNORE_RG);
774 _ref0 = (ref && pos < ref_len)? ref[pos] : 'N';
775 ref16 = seq_nt16_table[_ref0];
776 bcf_callaux_clean(bca, &bc);
777 for (i = 0; i < gplp.n; ++i)
778 bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i);
779 bc.tid = tid; bc.pos = pos;
780 bcf_call_combine(gplp.n, bcr, bca, ref16, &bc);
781 bcf_clear1(bcf_rec);
782 bcf_call2bcf(&bc, bcf_rec, bcr, conf->fmt_flag, 0, 0);
783 if (bcf_write1(bcf_fp, bcf_hdr, bcf_rec) != 0) {
784 print_error_errno("mpileup", "Failed to write VCF/BCF record to \"%s\"",
785 conf->output_fname?conf->output_fname:"standard output");
786 samtools_exit(EXIT_FAILURE);
787 }
788 // call indels; todo: subsampling with total_depth>max_indel_depth instead of ignoring?
789 if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0)
790 {
791 bcf_callaux_clean(bca, &bc);
792 for (i = 0; i < gplp.n; ++i)
793 bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], -1, bca, bcr + i);
794 if (bcf_call_combine(gplp.n, bcr, bca, -1, &bc) >= 0) {
795 bcf_clear1(bcf_rec);
796 bcf_call2bcf(&bc, bcf_rec, bcr, conf->fmt_flag, bca, ref);
797 if (bcf_write1(bcf_fp, bcf_hdr, bcf_rec) != 0) {
798 print_error_errno("mpileup", "Failed to write VCF/BCF record to \"%s\"",
799 conf->output_fname?conf->output_fname:"standard output");
800 samtools_exit(EXIT_FAILURE);
801 }
802 }
803 }
804 } else {
805 if (conf->all) {
806 // Deal with missing portions of previous tids
807 while (tid > last_tid) {
808 if (last_tid >= 0 && !conf->reg) {
809 while (++last_pos < sam_hdr_tid2len(h, last_tid)) {
810 if (conf->bed && bed_overlap(conf->bed, sam_hdr_tid2name(h, last_tid), last_pos, last_pos + 1) == 0)
811 continue;
812 print_empty_pileup(pileup_fp, conf, sam_hdr_tid2name(h, last_tid), last_pos, n, ref, ref_len);
813 }
814 }
815 last_tid++;
816 last_pos = -1;
817 if (conf->all < 2)
818 break;
819 }
820 }
821 if (conf->all) {
822 // Deal with missing portion of current tid
823 while (++last_pos < pos) {
824 if (conf->reg && last_pos < beg0) continue; // out of range; skip
825 if (conf->bed && bed_overlap(conf->bed, sam_hdr_tid2name(h, tid), last_pos, last_pos + 1) == 0)
826 continue;
827 print_empty_pileup(pileup_fp, conf, sam_hdr_tid2name(h, tid), last_pos, n, ref, ref_len);
828 }
829 last_tid = tid;
830 last_pos = pos;
831 }
832 if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, sam_hdr_tid2name(h, tid), pos, pos+1)) continue;
833
834 fprintf(pileup_fp, "%s\t%"PRIhts_pos"\t%c", sam_hdr_tid2name(h, tid), pos + 1, (ref && pos < ref_len)? ref[pos] : 'N');
835 for (i = 0; i < n; ++i) {
836 int j, cnt;
837 for (j = cnt = 0; j < n_plp[i]; ++j) {
838 const bam_pileup1_t *p = plp[i] + j;
839 int c = p->qpos < p->b->core.l_qseq
840 ? bam_get_qual(p->b)[p->qpos]
841 : 0;
842 if (c >= conf->min_baseQ) ++cnt;
843 }
844 fprintf(pileup_fp, "\t%d\t", cnt);
845 if (n_plp[i] == 0) {
846 fputs("*\t*", pileup_fp);
847 int flag_value = MPLP_PRINT_MAPQ_CHAR;
848 while(flag_value < MPLP_PRINT_LAST) {
849 if (flag_value != MPLP_PRINT_MODS
850 && (conf->flag & flag_value))
851 fputs("\t*", pileup_fp);
852 flag_value <<= 1;
853 }
854 if (conf->auxlist) {
855 int t = 0;
856 while(t++ < ((klist_t(auxlist) *)conf->auxlist)->size)
857 fputs("\t*", pileup_fp);
858 }
859 } else {
860 int n = 0;
861 kstring_t ks = KS_INITIALIZE;
862 for (j = 0; j < n_plp[i]; ++j) {
863 const bam_pileup1_t *p = plp[i] + j;
864 int c = p->qpos < p->b->core.l_qseq
865 ? bam_get_qual(p->b)[p->qpos]
866 : 0;
867 if (c >= conf->min_baseQ) {
868 n++;
869 if (pileup_seq(pileup_fp, plp[i] + j, pos, ref_len,
870 ref, &ks, conf->rev_del,
871 conf->no_ins, conf->no_ins_mods,
872 conf->no_del, conf->no_ends) < 0) {
873 ret = 1;
874 goto fail;
875 }
876 }
877 }
878 if (!n) putc('*', pileup_fp);
879
880 /* Print base qualities */
881 n = 0;
882 ks_free(&ks);
883 putc('\t', pileup_fp);
884 for (j = 0; j < n_plp[i]; ++j) {
885 const bam_pileup1_t *p = plp[i] + j;
886 int c = p->qpos < p->b->core.l_qseq
887 ? bam_get_qual(p->b)[p->qpos]
888 : 0;
889 if (c >= conf->min_baseQ) {
890 c = c + 33 < 126? c + 33 : 126;
891 putc(c, pileup_fp);
892 n++;
893 }
894 }
895 if (!n) putc('*', pileup_fp);
896
897 /* Print selected columns */
898 int flag_value = MPLP_PRINT_MAPQ_CHAR;
899 while(flag_value < MPLP_PRINT_LAST) {
900 if (flag_value != MPLP_PRINT_MODS
901 && (conf->flag & flag_value)) {
902 n = 0;
903 putc('\t', pileup_fp);
904 for (j = 0; j < n_plp[i]; ++j) {
905 const bam_pileup1_t *p = &plp[i][j];
906 int c = p->qpos < p->b->core.l_qseq
907 ? bam_get_qual(p->b)[p->qpos]
908 : 0;
909 if ( c < conf->min_baseQ ) continue;
910 if (n > 0 && flag_value != MPLP_PRINT_MAPQ_CHAR) putc(',', pileup_fp);
911 n++;
912
913 switch (flag_value) {
914 case MPLP_PRINT_MAPQ_CHAR:
915 c = p->b->core.qual + 33;
916 if (c > 126) c = 126;
917 putc(c, pileup_fp);
918 break;
919 case MPLP_PRINT_QPOS:
920 // query position in current orientation
921 fprintf(pileup_fp, "%d", p->qpos + 1);
922 break;
923 case MPLP_PRINT_QPOS5: {
924 // query position in 5' to 3' orientation
925 int pos5 = bam_is_rev(p->b)
926 ? p->b->core.l_qseq-p->qpos + p->is_del
927 : p->qpos + 1;
928 fprintf(pileup_fp, "%d", pos5);
929 break;
930 }
931 case MPLP_PRINT_QNAME:
932 fputs(bam_get_qname(p->b), pileup_fp);
933 break;
934 case MPLP_PRINT_FLAG:
935 fprintf(pileup_fp, "%d", p->b->core.flag);
936 break;
937 case MPLP_PRINT_RNAME:
938 if (p->b->core.tid >= 0)
939 fputs(sam_hdr_tid2name(h, p->b->core.tid), pileup_fp);
940 else
941 putc('*', pileup_fp);
942 break;
943 case MPLP_PRINT_POS:
944 fprintf(pileup_fp, "%"PRId64, (int64_t) p->b->core.pos + 1);
945 break;
946 case MPLP_PRINT_MAPQ:
947 fprintf(pileup_fp, "%d", p->b->core.qual);
948 break;
949 case MPLP_PRINT_RNEXT:
950 if (p->b->core.mtid >= 0)
951 fputs(sam_hdr_tid2name(h, p->b->core.mtid), pileup_fp);
952 else
953 putc('*', pileup_fp);
954 break;
955 case MPLP_PRINT_PNEXT:
956 fprintf(pileup_fp, "%"PRId64, (int64_t) p->b->core.mpos + 1);
957 break;
958 }
959 }
960 if (!n) putc('*', pileup_fp);
961 }
962 flag_value <<= 1;
963 }
964
965 /* Print selected tags */
966 klist_t(auxlist) *auxlist_p = ((klist_t(auxlist) *)conf->auxlist);
967 if (auxlist_p && auxlist_p->size) {
968 kliter_t(auxlist) *aux;
969 for (aux = kl_begin(auxlist_p); aux != kl_end(auxlist_p); aux = kl_next(aux)) {
970 n = 0;
971 putc('\t', pileup_fp);
972 for (j = 0; j < n_plp[i]; ++j) {
973 const bam_pileup1_t *p = &plp[i][j];
974 int c = p->qpos < p->b->core.l_qseq
975 ? bam_get_qual(p->b)[p->qpos]
976 : 0;
977 if ( c < conf->min_baseQ ) continue;
978
979 if (n > 0) putc(conf->sep, pileup_fp);
980 n++;
981 uint8_t* tag_u = bam_aux_get(p->b, kl_val(aux));
982 if (!tag_u) {
983 putc(conf->empty , pileup_fp);
984 continue;
985 }
986
987 /* Tag value is string */
988 if (*tag_u == 'Z' || *tag_u == 'H') {
989 char *tag_s = bam_aux2Z(tag_u);
990 if (!tag_s) continue;
991 fputs(tag_s, pileup_fp);
992 }
993
994 /* Tag value is integer */
995 if (*tag_u == 'I' || *tag_u == 'i' || *tag_u == 'C' || *tag_u == 'c' || *tag_u == 'S' || *tag_u == 's') {
996 int64_t tag_i = bam_aux2i(tag_u);
997 fprintf(pileup_fp, "%" PRId64 "", tag_i);
998 }
999
1000 /* Tag value is float */
1001 if (*tag_u == 'd' || *tag_u == 'f') {
1002 double tag_f = bam_aux2f(tag_u);
1003 fprintf(pileup_fp, "%lf", tag_f);
1004 }
1005
1006 /* Tag value is character */
1007 if (*tag_u == 'A') {
1008 char tag_c = bam_aux2A(tag_u);
1009 putc(tag_c, pileup_fp);
1010 }
1011 }
1012 if (!n) putc('*', pileup_fp);
1013 }
1014 }
1015 }
1016 }
1017 putc('\n', pileup_fp);
1018 }
1019 }
1020
1021 if (ret < 0) {
1022 print_error("mpileup", "error reading from input file");
1023 ret = EXIT_FAILURE;
1024 goto fail;
1025 }
1026
1027 if (conf->all && !(conf->flag & MPLP_BCF)) {
1028 // Handle terminating region
1029 if (last_tid < 0 && conf->reg && conf->all > 1) {
1030 last_tid = tid0;
1031 last_pos = beg0-1;
1032 mplp_get_ref(data[0], tid0, &ref, &ref_len);
1033 }
1034 while (last_tid >= 0 && last_tid < sam_hdr_nref(h)) {
1035 while (++last_pos < sam_hdr_tid2len(h, last_tid)) {
1036 if (last_pos >= end0) break;
1037 if (conf->bed && bed_overlap(conf->bed, sam_hdr_tid2name(h, last_tid), last_pos, last_pos + 1) == 0)
1038 continue;
1039 print_empty_pileup(pileup_fp, conf, sam_hdr_tid2name(h, last_tid), last_pos, n, ref, ref_len);
1040 }
1041 last_tid++;
1042 last_pos = -1;
1043 if (conf->all < 2 || conf->reg)
1044 break;
1045 }
1046 }
1047
1048 fail:
1049 // clean up
1050 free(bc.tmp.s);
1051 bcf_destroy1(bcf_rec);
1052 if (bcf_fp)
1053 {
1054 release_autoflush(bcf_fp);
1055 hts_close(bcf_fp);
1056 bcf_hdr_destroy(bcf_hdr);
1057 bcf_call_destroy(bca);
1058 free(bc.PL);
1059 free(bc.DP4);
1060 free(bc.ADR);
1061 free(bc.ADF);
1062 free(bc.fmt_arr);
1063 free(bcr);
1064 }
1065 if (pileup_fp && conf->output_fname) fclose(pileup_fp);
1066 bam_smpl_destroy(sm); free(buf.s);
1067 for (i = 0; i < gplp.n; ++i) free(gplp.plp[i]);
1068 free(gplp.plp); free(gplp.n_plp); free(gplp.m_plp);
1069 bcf_call_del_rghash(rghash);
1070 bam_mplp_destroy(iter);
1071 sam_hdr_destroy(h);
1072 for (i = 0; i < n; ++i) {
1073 sam_close(data[i]->fp);
1074 if (data[i]->iter) hts_itr_destroy(data[i]->iter);
1075 free(data[i]);
1076 }
1077 free(data); free(plp); free(n_plp);
1078 free(mp_ref.ref[0]);
1079 free(mp_ref.ref[1]);
1080 return ret;
1081 }
1082
is_url(const char * s)1083 static int is_url(const char *s)
1084 {
1085 static const char uri_scheme_chars[] =
1086 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-";
1087 return s[strspn(s, uri_scheme_chars)] == ':';
1088 }
1089
1090 #define MAX_PATH_LEN 1024
read_file_list(const char * file_list,int * n,char ** argv[])1091 int read_file_list(const char *file_list,int *n,char **argv[])
1092 {
1093 char buf[MAX_PATH_LEN];
1094 int len, nfiles = 0;
1095 char **files = NULL;
1096 struct stat sb;
1097
1098 *n = 0;
1099 *argv = NULL;
1100
1101 FILE *fh = fopen(file_list,"r");
1102 if ( !fh )
1103 {
1104 fprintf(samtools_stderr,"%s: %s\n", file_list,strerror(errno));
1105 return 1;
1106 }
1107
1108 files = calloc(nfiles,sizeof(char*));
1109 nfiles = 0;
1110 while ( fgets(buf,MAX_PATH_LEN,fh) )
1111 {
1112 // allow empty lines and trailing spaces
1113 len = strlen(buf);
1114 while ( len>0 && isspace(buf[len-1]) ) len--;
1115 if ( !len ) continue;
1116
1117 // check sanity of the file list
1118 buf[len] = 0;
1119 if (! (is_url(buf) || stat(buf, &sb) == 0))
1120 {
1121 // no such file, check if it is safe to print its name
1122 int i, safe_to_print = 1;
1123 for (i=0; i<len; i++)
1124 if (!isprint(buf[i])) { safe_to_print = 0; break; }
1125 if ( safe_to_print )
1126 fprintf(samtools_stderr,"The file list \"%s\" appears broken, could not locate: %s\n", file_list,buf);
1127 else
1128 fprintf(samtools_stderr,"Does the file \"%s\" really contain a list of files and do all exist?\n", file_list);
1129 return 1;
1130 }
1131
1132 nfiles++;
1133 files = realloc(files,nfiles*sizeof(char*));
1134 files[nfiles-1] = strdup(buf);
1135 }
1136 fclose(fh);
1137 if ( !nfiles )
1138 {
1139 fprintf(samtools_stderr,"No files read from %s\n", file_list);
1140 return 1;
1141 }
1142 *argv = files;
1143 *n = nfiles;
1144 return 0;
1145 }
1146 #undef MAX_PATH_LEN
1147
parse_format_flag(const char * str)1148 int parse_format_flag(const char *str)
1149 {
1150 int i, flag = 0, n_tags;
1151 char **tags = hts_readlist(str, 0, &n_tags);
1152 for(i=0; i<n_tags; i++)
1153 {
1154 if ( !strcasecmp(tags[i],"DP") ) flag |= B2B_FMT_DP;
1155 else if ( !strcasecmp(tags[i],"DV") ) { flag |= B2B_FMT_DV; fprintf(samtools_stderr, "[warning] tag DV functional, but deprecated. Please switch to `AD` in future.\n"); }
1156 else if ( !strcasecmp(tags[i],"SP") ) flag |= B2B_FMT_SP;
1157 else if ( !strcasecmp(tags[i],"DP4") ) { flag |= B2B_FMT_DP4; fprintf(samtools_stderr, "[warning] tag DP4 functional, but deprecated. Please switch to `ADF` and `ADR` in future.\n"); }
1158 else if ( !strcasecmp(tags[i],"DPR") ) { flag |= B2B_FMT_DPR; fprintf(samtools_stderr, "[warning] tag DPR functional, but deprecated. Please switch to `AD` in future.\n"); }
1159 else if ( !strcasecmp(tags[i],"INFO/DPR") ) { flag |= B2B_INFO_DPR; fprintf(samtools_stderr, "[warning] tag INFO/DPR functional, but deprecated. Please switch to `INFO/AD` in future.\n"); }
1160 else if ( !strcasecmp(tags[i],"AD") ) flag |= B2B_FMT_AD;
1161 else if ( !strcasecmp(tags[i],"ADF") ) flag |= B2B_FMT_ADF;
1162 else if ( !strcasecmp(tags[i],"ADR") ) flag |= B2B_FMT_ADR;
1163 else if ( !strcasecmp(tags[i],"INFO/AD") ) flag |= B2B_INFO_AD;
1164 else if ( !strcasecmp(tags[i],"INFO/ADF") ) flag |= B2B_INFO_ADF;
1165 else if ( !strcasecmp(tags[i],"INFO/ADR") ) flag |= B2B_INFO_ADR;
1166 else
1167 {
1168 fprintf(samtools_stderr,"Could not parse tag \"%s\" in \"%s\"\n", tags[i], str);
1169 samtools_exit(EXIT_FAILURE);
1170 }
1171 free(tags[i]);
1172 }
1173 if (n_tags) free(tags);
1174 return flag;
1175 }
1176
print_usage(FILE * fp,const mplp_conf_t * mplp)1177 static void print_usage(FILE *fp, const mplp_conf_t *mplp)
1178 {
1179 char *tmp_require = bam_flag2str(mplp->rflag_require);
1180 char *tmp_filter = bam_flag2str(mplp->rflag_filter);
1181
1182 // Display usage information, formatted for the standard 80 columns.
1183 // (The unusual string formatting here aids the readability of this
1184 // source code in 80 columns, to the extent that's possible.)
1185
1186 fprintf(fp,
1187 "\n"
1188 "Usage: samtools mpileup [options] in1.bam [in2.bam [...]]\n"
1189 "\n"
1190 "Input options:\n"
1191 " -6, --illumina1.3+ quality is in the Illumina-1.3+ encoding\n"
1192 " -A, --count-orphans do not discard anomalous read pairs\n"
1193 " -b, --bam-list FILE list of input BAM filenames, one per line\n"
1194 " -B, --no-BAQ disable BAQ (per-Base Alignment Quality)\n"
1195 " -C, --adjust-MQ INT adjust mapping quality; recommended:50, disable:0 [0]\n"
1196 " -d, --max-depth INT max per-file depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
1197 fprintf(fp,
1198 " -E, --redo-BAQ recalculate BAQ on the fly, ignore existing BQs\n"
1199 " -f, --fasta-ref FILE faidx indexed reference sequence file\n"
1200 " -G, --exclude-RG FILE exclude read groups listed in FILE\n"
1201 " -l, --positions FILE skip unlisted positions (chr pos) or regions (BED)\n"
1202 " -q, --min-MQ INT skip alignments with mapQ smaller than INT [%d]\n", mplp->min_mq);
1203 fprintf(fp,
1204 " -Q, --min-BQ INT skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp->min_baseQ);
1205 fprintf(fp,
1206 " -r, --region REG region in which pileup is generated\n"
1207 " -R, --ignore-RG ignore RG tags (one BAM = one sample)\n"
1208 " --rf, --incl-flags STR|INT required flags: include reads with any of the mask bits set [%s]\n", tmp_require);
1209 fprintf(fp,
1210 " --ff, --excl-flags STR|INT filter flags: skip reads with any of the mask bits set\n"
1211 " [%s]\n", tmp_filter);
1212 fprintf(fp,
1213 " -x, --ignore-overlaps disable read-pair overlap detection\n"
1214 " -X, --customized-index use customized index files\n" // -X flag for index filename
1215 "\n"
1216 "Output options:\n"
1217 " -o, --output FILE write output to FILE [standard output]\n"
1218 " -O, --output-BP output base positions on reads, current orientation\n"
1219 " --output-BP-5 output base positions on reads, 5' to 3' orientation\n"
1220 " -M, --output-mods output base modifications\n"
1221 " -s, --output-MQ output mapping quality\n"
1222 " --output-QNAME output read names\n"
1223 " --output-extra STR output extra read fields and read tag values\n"
1224 " --output-sep CHAR set the separator character for tag lists [,]\n"
1225 " --output-empty CHAR set the no value character for tag lists [*]\n"
1226 " --no-output-ins skip insertion sequence after +NUM\n"
1227 " Use twice for complete insertion removal\n"
1228 " --no-output-ins-mods don't display base modifications within insertions\n"
1229 " --no-output-del skip deletion sequence after -NUM\n"
1230 " Use twice for complete deletion removal\n"
1231 " --no-output-ends remove ^MQUAL and $ markup in sequence column\n"
1232 " --reverse-del use '#' character for deletions on the reverse strand\n"
1233 " -a output all positions (including zero depth)\n"
1234 " -a -a (or -aa) output absolutely all positions, including unused ref. sequences\n"
1235 "\n"
1236 "Generic options:\n");
1237 sam_global_opt_help(fp, "-.--.--.");
1238
1239 fprintf(fp, "\n"
1240 "Note that using \"samtools mpileup\" to generate BCF or VCF files is now\n"
1241 "deprecated. To output these formats, please use \"bcftools mpileup\" instead.\n");
1242
1243 free(tmp_require);
1244 free(tmp_filter);
1245 }
1246
deprecated(char opt)1247 static void deprecated(char opt) {
1248 fprintf(samtools_stderr, "[warning] samtools mpileup option `%c` is functional, "
1249 "but deprecated. Please switch to using bcftools mpileup in future.\n", opt);
1250 }
1251
bam_mpileup(int argc,char * argv[])1252 int bam_mpileup(int argc, char *argv[])
1253 {
1254 int c;
1255 const char *file_list = NULL;
1256 char **fn = NULL;
1257 int nfiles = 0, use_orphan = 0, has_index_file = 0;
1258 mplp_conf_t mplp;
1259 memset(&mplp, 0, sizeof(mplp_conf_t));
1260 mplp.min_baseQ = 13;
1261 mplp.capQ_thres = 0;
1262 mplp.max_depth = MPLP_MAX_DEPTH;
1263 mplp.max_indel_depth = MPLP_MAX_INDEL_DEPTH;
1264 mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
1265 mplp.min_frac = 0.002; mplp.min_support = 1;
1266 mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS;
1267 mplp.argc = argc; mplp.argv = argv;
1268 mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
1269 mplp.output_fname = NULL;
1270 mplp.all = 0;
1271 mplp.rev_del = 0;
1272 mplp.sep = ',';
1273 mplp.empty = '*';
1274 sam_global_args_init(&mplp.ga);
1275
1276 static const struct option lopts[] =
1277 {
1278 SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0, '-'),
1279 {"rf", required_argument, NULL, 1}, // require flag
1280 {"ff", required_argument, NULL, 2}, // filter flag
1281 {"incl-flags", required_argument, NULL, 1},
1282 {"excl-flags", required_argument, NULL, 2},
1283 {"output", required_argument, NULL, 3},
1284 {"open-prob", required_argument, NULL, 4},
1285 {"output-QNAME", no_argument, NULL, 5},
1286 {"output-qname", no_argument, NULL, 5},
1287 {"illumina1.3+", no_argument, NULL, '6'},
1288 {"count-orphans", no_argument, NULL, 'A'},
1289 {"bam-list", required_argument, NULL, 'b'},
1290 {"no-BAQ", no_argument, NULL, 'B'},
1291 {"no-baq", no_argument, NULL, 'B'},
1292 {"adjust-MQ", required_argument, NULL, 'C'},
1293 {"adjust-mq", required_argument, NULL, 'C'},
1294 {"max-depth", required_argument, NULL, 'd'},
1295 {"redo-BAQ", no_argument, NULL, 'E'},
1296 {"redo-baq", no_argument, NULL, 'E'},
1297 {"fasta-ref", required_argument, NULL, 'f'},
1298 {"exclude-RG", required_argument, NULL, 'G'},
1299 {"exclude-rg", required_argument, NULL, 'G'},
1300 {"positions", required_argument, NULL, 'l'},
1301 {"region", required_argument, NULL, 'r'},
1302 {"ignore-RG", no_argument, NULL, 'R'},
1303 {"ignore-rg", no_argument, NULL, 'R'},
1304 {"min-MQ", required_argument, NULL, 'q'},
1305 {"min-mq", required_argument, NULL, 'q'},
1306 {"min-BQ", required_argument, NULL, 'Q'},
1307 {"min-bq", required_argument, NULL, 'Q'},
1308 {"ignore-overlaps", no_argument, NULL, 'x'},
1309 {"BCF", no_argument, NULL, 'g'},
1310 {"bcf", no_argument, NULL, 'g'},
1311 {"VCF", no_argument, NULL, 'v'},
1312 {"vcf", no_argument, NULL, 'v'},
1313 {"output-mods", no_argument, NULL, 'M'},
1314 {"output-BP", no_argument, NULL, 'O'},
1315 {"output-bp", no_argument, NULL, 'O'},
1316 {"output-BP-5", no_argument, NULL, 14},
1317 {"output-bp-5", no_argument, NULL, 14},
1318 {"output-MQ", no_argument, NULL, 's'},
1319 {"output-mq", no_argument, NULL, 's'},
1320 {"output-tags", required_argument, NULL, 't'},
1321 {"uncompressed", no_argument, NULL, 'u'},
1322 {"ext-prob", required_argument, NULL, 'e'},
1323 {"gap-frac", required_argument, NULL, 'F'},
1324 {"tandem-qual", required_argument, NULL, 'h'},
1325 {"skip-indels", no_argument, NULL, 'I'},
1326 {"max-idepth", required_argument, NULL, 'L'},
1327 {"min-ireads ", required_argument, NULL, 'm'},
1328 {"per-sample-mF", no_argument, NULL, 'p'},
1329 {"per-sample-mf", no_argument, NULL, 'p'},
1330 {"platforms", required_argument, NULL, 'P'},
1331 {"customized-index", no_argument, NULL, 'X'},
1332 {"reverse-del", no_argument, NULL, 6},
1333 {"output-extra", required_argument, NULL, 7},
1334 {"output-sep", required_argument, NULL, 8},
1335 {"output-empty", required_argument, NULL, 9},
1336 {"no-output-ins", no_argument, NULL, 10},
1337 {"no-output-ins-mods", no_argument, NULL, 11},
1338 {"no-output-del", no_argument, NULL, 12},
1339 {"no-output-ends", no_argument, NULL, 13},
1340 {NULL, 0, NULL, 0}
1341 };
1342
1343 while ((c = getopt_long(argc, argv, "Agf:r:l:q:Q:uRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsVvxXt:aM",lopts,NULL)) >= 0) {
1344 switch (c) {
1345 case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
1346 case 1 :
1347 mplp.rflag_require = bam_str2flag(optarg);
1348 if ( mplp.rflag_require<0 ) { fprintf(samtools_stderr,"Could not parse --rf %s\n", optarg); return 1; }
1349 break;
1350 case 2 :
1351 mplp.rflag_filter = bam_str2flag(optarg);
1352 if ( mplp.rflag_filter<0 ) { fprintf(samtools_stderr,"Could not parse --ff %s\n", optarg); return 1; }
1353 break;
1354 case 3 : mplp.output_fname = optarg; break;
1355 case 4 : mplp.openQ = atoi(optarg); break;
1356 case 5 : mplp.flag |= MPLP_PRINT_QNAME; break;
1357 case 6 : mplp.rev_del = 1; break;
1358 case 7 :
1359 if (build_auxlist(&mplp, optarg) != 0) {
1360 fprintf(samtools_stderr,"Could not build aux list using '%s'\n", optarg);
1361 return 1;
1362 }
1363 break;
1364 case 8: mplp.sep = optarg[0]; break;
1365 case 9: mplp.empty = optarg[0]; break;
1366 case 10: mplp.no_ins++; break;
1367 case 11: mplp.no_ins_mods = 1; break;
1368 case 12: mplp.no_del++; break;
1369 case 13: mplp.no_ends = 1; break;
1370 case 'f':
1371 mplp.fai = fai_load(optarg);
1372 if (mplp.fai == NULL) return 1;
1373 mplp.fai_fname = optarg;
1374 break;
1375 case 'd': mplp.max_depth = atoi(optarg); break;
1376 case 'r': mplp.reg = strdup(optarg); break;
1377 case 'l':
1378 // In the original version the whole BAM was streamed which is inefficient
1379 // with few BED intervals and big BAMs. Todo: devise a heuristic to determine
1380 // best strategy, that is streaming or jumping.
1381 mplp.bed = bed_read(optarg);
1382 if (!mplp.bed) { print_error_errno("mpileup", "Could not read file \"%s\"", optarg); return 1; }
1383 break;
1384 case 'P': mplp.pl_list = strdup(optarg); deprecated(c); break;
1385 case 'p': mplp.flag |= MPLP_PER_SAMPLE; deprecated(c); break;
1386 case 'g': mplp.flag |= MPLP_BCF; deprecated(c); break;
1387 case 'v': mplp.flag |= MPLP_BCF | MPLP_VCF; deprecated(c); break;
1388 case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_BCF; deprecated(c); break;
1389 case 'B': mplp.flag &= ~MPLP_REALN; break;
1390 case 'X': has_index_file = 1; break;
1391 case 'D': mplp.fmt_flag |= B2B_FMT_DP; deprecated(c); break;
1392 case 'S': mplp.fmt_flag |= B2B_FMT_SP; deprecated(c); break;
1393 case 'V': mplp.fmt_flag |= B2B_FMT_DV; deprecated(c); break;
1394 case 'I': mplp.flag |= MPLP_NO_INDEL; deprecated(c); break;
1395 case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
1396 case '6': mplp.flag |= MPLP_ILLUMINA13; break;
1397 case 'R': mplp.flag |= MPLP_IGNORE_RG; break;
1398 case 's': mplp.flag |= MPLP_PRINT_MAPQ_CHAR; break;
1399 case 'O':
1400 if (!(mplp.flag & MPLP_PRINT_QPOS5))
1401 mplp.flag |= MPLP_PRINT_QPOS;
1402 break;
1403 case 14:
1404 mplp.flag |= MPLP_PRINT_QPOS5;
1405 mplp.flag &= ~MPLP_PRINT_QPOS;
1406 break;
1407 case 'M': mplp.flag |= MPLP_PRINT_MODS; break;
1408 case 'C': mplp.capQ_thres = atoi(optarg); break;
1409 case 'q': mplp.min_mq = atoi(optarg); break;
1410 case 'Q': mplp.min_baseQ = atoi(optarg); break;
1411 case 'b': file_list = optarg; break;
1412 case 'o': {
1413 char *end;
1414 long value = strtol(optarg, &end, 10);
1415 // Distinguish between -o INT and -o FILE (a bit of a hack!)
1416 if (*end == '\0') {
1417 mplp.openQ = value;
1418 fprintf(samtools_stderr, "[warning] samtools mpileup option "
1419 "'--open-prob INT' is functional, but deprecated. "
1420 "Please switch to using bcftools mpileup in future.\n");
1421 } else {
1422 mplp.output_fname = optarg;
1423 }
1424 }
1425 break;
1426 case 'e': mplp.extQ = atoi(optarg); deprecated(c); break;
1427 case 'h': mplp.tandemQ = atoi(optarg); deprecated(c); break;
1428 case 'A': use_orphan = 1; break;
1429 case 'F': mplp.min_frac = atof(optarg); deprecated(c); break;
1430 case 'm': mplp.min_support = atoi(optarg); deprecated(c); break;
1431 case 'L': mplp.max_indel_depth = atoi(optarg); deprecated(c); break;
1432 case 'G': {
1433 FILE *fp_rg;
1434 char buf[1024];
1435 mplp.rghash = khash_str2int_init();
1436 if ((fp_rg = fopen(optarg, "r")) == NULL)
1437 fprintf(samtools_stderr, "[%s] Fail to open file %s. Continue anyway.\n", __func__, optarg);
1438 while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me...
1439 khash_str2int_inc(mplp.rghash, strdup(buf));
1440 fclose(fp_rg);
1441 }
1442 break;
1443 case 't': mplp.fmt_flag |= parse_format_flag(optarg); deprecated(c); break;
1444 case 'a': mplp.all++; break;
1445 default:
1446 if (parse_sam_global_opt(c, optarg, lopts, &mplp.ga) == 0) break;
1447 /* else fall-through */
1448 case '?':
1449 print_usage(samtools_stderr, &mplp);
1450 return 1;
1451 }
1452 }
1453 if (!mplp.fai && mplp.ga.reference) {
1454 mplp.fai_fname = mplp.ga.reference;
1455 mplp.fai = fai_load(mplp.fai_fname);
1456 if (mplp.fai == NULL) return 1;
1457 }
1458
1459 if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ )
1460 {
1461 fprintf(samtools_stderr,"Error: The -B option cannot be combined with -E\n");
1462 return 1;
1463 }
1464 if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
1465 if (argc == 1)
1466 {
1467 print_usage(samtools_stderr, &mplp);
1468 return 1;
1469 }
1470 int ret;
1471 if (file_list) {
1472 if (has_index_file) {
1473 fprintf(samtools_stderr,"Error: The -b option cannot be combined with -X\n"); // No customize index loc in file list mode
1474 return 1;
1475 }
1476 if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
1477 ret = mpileup(&mplp,nfiles,fn,NULL);
1478 for (c=0; c<nfiles; c++) free(fn[c]);
1479 free(fn);
1480 }
1481 else {
1482 if (has_index_file) {
1483 if ((argc - optind)%2 !=0) { // Calculate # of input BAM files
1484 fprintf(samtools_stderr, "Odd number of filenames detected! Each BAM file should have an index file\n");
1485 return 1;
1486 }
1487 nfiles = (argc - optind)/2;
1488 ret = mpileup(&mplp, nfiles, argv + optind, argv + nfiles + optind);
1489 } else {
1490 nfiles = argc - optind;
1491 ret = mpileup(&mplp, nfiles, argv + optind, NULL);
1492 }
1493 }
1494 if (mplp.rghash) khash_str2int_destroy_free(mplp.rghash);
1495 free(mplp.reg); free(mplp.pl_list);
1496 if (mplp.fai) fai_destroy(mplp.fai);
1497 if (mplp.bed) bed_destroy(mplp.bed);
1498 if (mplp.auxlist) kl_destroy(auxlist, (klist_t(auxlist) *)mplp.auxlist);
1499 return ret;
1500 }
1501