1 /* test/pileup.c -- simple pileup tester
2
3 Copyright (C) 2014,2018-2019 Genome Research Ltd.
4
5 Author: James Bonfield <jkb@sanger.ac.uk>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
24
25 /*
26 The output from this program isn't quite the same as that from
27 `samtools mpileup`. It doesn't print the reference base column,
28 it puts brackets around insertion sequences to make them easier to spot
29 and it writes empty brackets after a reported deletion.
30
31 The output from `samtools mpileup` can be converted to the same format like
32 this:
33
34 samtools mpileup -B -Q 0 in.bam | perl -lane \
35 'pop(@F);
36 splice(@F, 2, 1);
37 $F[3] =~ s/\+(\d+)([ACGTN]+)/sprintf("+%d(%s)%s",$1,substr($2,0,$1),substr($2,$1))/ieg;
38 $F[3] =~ s/\-(\d+)([ACGTN]+)/sprintf("-%d()%s",$1,substr($2,$1))/ieg;
39 print join("\t", @F);'
40
41 */
42
43 #include <config.h>
44
45 #include <stdio.h>
46 #include <string.h>
47 #include <errno.h>
48 #include <ctype.h>
49 #include <unistd.h>
50
51 #include "../htslib/sam.h"
52 #include "../htslib/kstring.h"
53
54 #define MIN(a,b) ((a)<(b)?(a):(b))
55
56 typedef struct ptest_t {
57 const char *fname;
58 samFile *fp;
59 sam_hdr_t *fp_hdr;
60 } ptest_t;
61
readaln(void * data,bam1_t * b)62 static int readaln(void *data, bam1_t *b) {
63 ptest_t *g = (ptest_t*)data;
64 int ret;
65
66 while (1) {
67 ret = sam_read1(g->fp, g->fp_hdr, b);
68 if (ret < 0) break;
69 if ( b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP) ) continue;
70 break;
71 }
72
73 return ret;
74 }
75
print_pileup_seq(const bam_pileup1_t * p,int n)76 static int print_pileup_seq(const bam_pileup1_t *p, int n) {
77 kstring_t ks = { 0, 0, NULL };
78 int i;
79
80 for (i = 0; i < n; i++, p++) {
81 uint8_t *seq = bam_get_seq(p->b);
82 int del_len, is_rev = bam_is_rev(p->b);
83
84 if (p->is_head)
85 putchar('^'), putchar('!'+MIN(p->b->core.qual,93));
86
87 if (p->is_del)
88 putchar(p->is_refskip ? (is_rev ? '<' : '>') : '*');
89 else {
90 unsigned char c = seq_nt16_str[bam_seqi(seq, p->qpos)];
91 putchar(is_rev ? tolower(c) : toupper(c));
92 }
93
94 del_len = -p->indel;
95 if (p->indel > 0) {
96 int j, len = bam_plp_insertion(p, &ks, &del_len);
97 if (len < 0) {
98 perror("bam_plp_insertion");
99 goto fail;
100 }
101 printf("%+d(", len);
102 for (j = 0; j < len; j++)
103 putchar(is_rev ?
104 tolower((uint8_t) ks.s[j]) :
105 toupper((uint8_t) ks.s[j]));
106 putchar(')');
107 }
108 if (del_len > 0) {
109 printf("-%d()", del_len);
110 }
111 if (p->is_tail)
112 putchar('$');
113 }
114 free(ks.s);
115 return 0;
116
117 fail:
118 free(ks.s);
119 return -1;
120 }
121
test_pileup(ptest_t * input)122 static int test_pileup(ptest_t *input) {
123 bam_plp_t plp = NULL;
124 const bam_pileup1_t *p;
125 int tid, pos, n = 0;
126
127 plp = bam_plp_init(readaln, input);
128 if (!plp) {
129 perror("bam_plp_init");
130 goto fail;
131 }
132 while ((p = bam_plp_auto(plp, &tid, &pos, &n)) != 0) {
133 if (tid < 0) break;
134 if (tid >= input->fp_hdr->n_targets) {
135 fprintf(stderr,
136 "bam_plp_auto returned tid %d >= header n_targets %d\n",
137 tid, input->fp_hdr->n_targets);
138 goto fail;
139 }
140
141 printf("%s\t%d\t%d\t", input->fp_hdr->target_name[tid], pos+1, n);
142
143 if (print_pileup_seq(p, n) < 0)
144 goto fail;
145
146 putchar('\n');
147 }
148 if (n < 0) {
149 fprintf(stderr, "bam_plp_auto failed for \"%s\"\n", input->fname);
150 goto fail;
151 }
152
153 bam_plp_destroy(plp);
154 return 0;
155
156 fail:
157 bam_plp_destroy(plp);
158 return -1;
159 }
160
test_mpileup(ptest_t * input)161 static int test_mpileup(ptest_t *input) {
162 bam_mplp_t iter = NULL;
163 const bam_pileup1_t *pileups[1] = { NULL };
164 int n_plp[1] = { 0 };
165 int tid, pos, n = 0;
166
167 iter = bam_mplp_init(1, readaln, (void **) &input);
168 if (!iter) {
169 perror("bam_plp_init");
170 goto fail;
171 }
172 if (bam_mplp_init_overlaps(iter) < 0) {
173 perror("bam_mplp_init_overlaps");
174 goto fail;
175 }
176
177 while ((n = bam_mplp_auto(iter, &tid, &pos, n_plp, pileups)) > 0) {
178 if (tid < 0) break;
179 if (tid >= input->fp_hdr->n_targets) {
180 fprintf(stderr,
181 "bam_mplp_auto returned tid %d >= header n_targets %d\n",
182 tid, input->fp_hdr->n_targets);
183 goto fail;
184 }
185
186 printf("%s\t%d\t%d\t", input->fp_hdr->target_name[tid], pos+1, n_plp[0]);
187
188 if (print_pileup_seq(pileups[0], n_plp[0]) < 0)
189 goto fail;
190
191 putchar('\n');
192 }
193 if (n < 0) {
194 fprintf(stderr, "bam_plp_auto failed for \"%s\"\n", input->fname);
195 goto fail;
196 }
197
198 bam_mplp_destroy(iter);
199 return 0;
200
201 fail:
202 bam_mplp_destroy(iter);
203 return -1;
204 }
205
main(int argc,char ** argv)206 int main(int argc, char **argv) {
207 ptest_t g = { NULL, NULL, NULL };
208 int use_mpileup = 0, opt;
209
210 while ((opt = getopt(argc, argv, "m")) != -1) {
211 switch (opt) {
212 case 'm':
213 use_mpileup = 1;
214 break;
215 default:
216 fprintf(stderr, "Usage: %s [-m] <sorted.sam>\n", argv[0]);
217 return EXIT_FAILURE;
218 }
219 }
220
221 if (optind >= argc) {
222 fprintf(stderr, "Usage: %s [-m] <sorted.sam>\n", argv[0]);
223 return EXIT_FAILURE;
224 }
225
226 g.fname = argv[optind];
227 g.fp = sam_open(g.fname, "r");
228 if (!g.fp) {
229 fprintf(stderr, "Couldn't open \"%s\" : %s", g.fname, strerror(errno));
230 goto fail;
231 }
232 g.fp_hdr = sam_hdr_read(g.fp);
233 if (!g.fp_hdr) {
234 fprintf(stderr, "Couldn't read header from \"%s\" : %s",
235 g.fname, strerror(errno));
236 goto fail;
237 }
238
239 if (use_mpileup) {
240 if (test_mpileup(&g) < 0)
241 goto fail;
242 } else {
243 if (test_pileup(&g) < 0)
244 goto fail;
245 }
246
247 sam_hdr_destroy(g.fp_hdr);
248 sam_close(g.fp);
249
250 return EXIT_SUCCESS;
251
252 fail:
253 if (g.fp_hdr) sam_hdr_destroy(g.fp_hdr);
254 if (g.fp) sam_close(g.fp);
255 return EXIT_FAILURE;
256 }
257