1 /*
2 Copyright (C) 2018-2019 Genome Research Ltd.
3
4 Author: James Bonfield <jkb@sanger.ac.uk>
5
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
12
13 The above copyright notice and this permission notice shall be included in
14 all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 THE SOFTWARE.
23 */
24
25 /*
26 Test region description parser.
27 Usage: test-parse-reg [-c] file.bam region
28 test-parse-reg [-c] -m file.bam region,region...
29 test-parse-reg -t
30
31 -c is chr:pos is a single base coordinate, ie chr:pos-pos,
32 otherwise it is chr:pos-<end>
33 -m is multi-region list.
34 -t runs built-in tests
35
36 ./test/test-parse-reg -c -m test/colons.bam "{chr1:100-200},{chr1}:100-200,{chr1:100-200}:100,{chr1,chr3},chr1:"
37 */
38
39 #include <config.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <stdint.h>
45 #include <inttypes.h>
46
47 #include "../htslib/hts.h"
48 #include "../htslib/sam.h"
49
reg_expected(sam_hdr_t * hdr,const char * reg,int flags,char * reg_exp,int tid_exp,hts_pos_t beg_exp,hts_pos_t end_exp)50 void reg_expected(sam_hdr_t *hdr, const char *reg, int flags,
51 char *reg_exp, int tid_exp, hts_pos_t beg_exp, hts_pos_t end_exp) {
52 const char *reg_out;
53 int tid_out = -1;
54 hts_pos_t beg_out = -1, end_out = -1;
55
56 reg_out = sam_parse_region(hdr, reg, &tid_out, &beg_out, &end_out, flags);
57
58 if ((reg_out != NULL) != (reg_exp != NULL) ||
59 (reg_out && reg_exp && strcmp(reg_out, reg_exp) != 0) ||
60 (reg_exp && tid_out != tid_exp) ||
61 (reg_exp && beg_out != beg_exp) ||
62 (reg_exp && end_out != end_exp)) {
63 fprintf(stderr, "Parsing \"%s\" expected return \"%s\", %d:%"PRIhts_pos"-%"PRIhts_pos", "
64 "but got \"%s\", %d:%"PRIhts_pos"-%"PRIhts_pos"\n",
65 reg,
66 reg_exp?reg_exp:"(null)", tid_exp, beg_exp, end_exp,
67 reg_out?reg_out:"(null)", tid_out, beg_out, end_out);
68 exit(1);
69 }
70 }
71
reg_test(char * fn)72 int reg_test(char *fn) {
73 samFile *fp;
74 sam_hdr_t *hdr;
75
76 if (!(fp = sam_open(fn, "r")))
77 return 1;
78
79 if (!(hdr = sam_hdr_read(fp)))
80 return 1;
81
82 // 0 chr1
83 // 1 chr1:100
84 // 2 chr1:100-200
85 // 3 chr2:100-200
86 // 4 chr3
87 // 5 chr1,chr3
88
89 // Check range extensions.
90 reg_expected(hdr, "chr1", 0, "", 0, 0, HTS_POS_MAX);
91 reg_expected(hdr, "chr1:50", 0, "", 0, 49, HTS_POS_MAX);
92 reg_expected(hdr, "chr1:50", HTS_PARSE_ONE_COORD, "", 0, 49, 50);
93 reg_expected(hdr, "chr1:50-100", 0, "", 0, 49, 100);
94 reg_expected(hdr, "chr1:50-", 0, "", 0, 49, HTS_POS_MAX);
95 reg_expected(hdr, "chr1:-50", 0, "", 0, 0, 50);
96
97 // Check quoting
98 fprintf(stderr, "Expected error: ");
99 reg_expected(hdr, "chr1:100-200", 0, NULL, 0, 0, 0); // ambiguous
100 reg_expected(hdr, "{chr1}:100-200", 0, "", 0, 99, 200);
101 reg_expected(hdr, "{chr1:100-200}", 0, "", 2, 0, HTS_POS_MAX);
102 reg_expected(hdr, "{chr1:100-200}:100-200", 0, "", 2, 99, 200);
103 reg_expected(hdr, "{chr2:100-200}:100-200", 0, "", 3, 99, 200);
104 reg_expected(hdr, "chr2:100-200:100-200", 0, "", 3, 99, 200);
105 reg_expected(hdr, "chr2:100-200", 0, "", 3, 0, HTS_POS_MAX);
106
107 // Check numerics
108 reg_expected(hdr, "chr3", 0, "", 4, 0, HTS_POS_MAX);
109 reg_expected(hdr, "chr3:", 0, "", 4, 0, HTS_POS_MAX);
110 reg_expected(hdr, "chr3:1000-1500", 0, "", 4, 999, 1500);
111 reg_expected(hdr, "chr3:1,000-1,500", 0, "", 4, 999, 1500);
112 reg_expected(hdr, "chr3:1k-1.5K", 0, "", 4, 999, 1500);
113 reg_expected(hdr, "chr3:1e3-1.5e3", 0, "", 4, 999, 1500);
114 reg_expected(hdr, "chr3:1e3-15e2", 0, "", 4, 999, 1500);
115
116 // Check list mode
117 reg_expected(hdr, "chr1,chr3", HTS_PARSE_LIST, "chr3", 0, 0, HTS_POS_MAX);
118 fprintf(stderr, "Expected error: ");
119 reg_expected(hdr, "chr1:100-200,chr3", HTS_PARSE_LIST, NULL, 0, 0, 0); // ambiguous
120 reg_expected(hdr, "{chr1,chr3}", HTS_PARSE_LIST, "", 5, 0, HTS_POS_MAX);
121 reg_expected(hdr, "{chr1,chr3},chr1", HTS_PARSE_LIST, "chr1", 5, 0, HTS_POS_MAX);
122 // incorrect usage; first reg is valid (but not what user expects).
123 reg_expected(hdr, "chr3:1,000-1,500", HTS_PARSE_LIST | HTS_PARSE_ONE_COORD, "000-1,500", 4, 0, 1);
124
125 // More expected failures
126 reg_expected(hdr, "chr2", 0, NULL, 0, 0, 0);
127 reg_expected(hdr, "chr1,", 0, NULL, 0, 0, 0);
128 fprintf(stderr, "Expected error: ");
129 reg_expected(hdr, "{chr1", 0, NULL, 0, 0, 0);
130 reg_expected(hdr, "chr1:10-10", 0, "", 0, 9, 10); // OK
131 reg_expected(hdr, "chr1:10-9", 0, NULL, 0, 0, 0); // Issue#353
132 fprintf(stderr, "Expected error: ");
133 reg_expected(hdr, "chr1:x", 0, NULL, 0, 0, 0);
134 fprintf(stderr, "Expected error: ");
135 reg_expected(hdr, "chr1:1-y", 0, NULL, 0, 0, 0);
136 fprintf(stderr, "Expected error: ");
137 reg_expected(hdr, "chr1:1,chr3", 0, NULL, 0, 0, 0);
138
139 sam_hdr_destroy(hdr);
140 sam_close(fp);
141
142 exit(0);
143 }
144
main(int argc,char ** argv)145 int main(int argc, char **argv) {
146 sam_hdr_t *hdr;
147 samFile *fp;
148 int flags = 0;
149
150 while (argc > 1) {
151 if (strcmp(argv[1], "-m") == 0) {
152 flags |= HTS_PARSE_LIST;
153 argc--; argv++;
154 continue;
155 }
156
157 if (strcmp(argv[1], "-c") == 0) {
158 flags |= HTS_PARSE_ONE_COORD;
159 argc--; argv++;
160 continue;
161 }
162
163 // Automatic mode for test harness
164 if (strcmp(argv[1], "-t") == 0)
165 reg_test(argv[2]);
166
167 break;
168 }
169
170 // Interactive mode for debugging
171 if (argc != 3) {
172 fprintf(stderr, "Usage: test-parse-reg [-m] [-c] region[,region]...\n");
173 exit(1);
174 }
175
176 if (!(fp = sam_open(argv[1], "r"))) {
177 perror(argv[1]);
178 exit(1);
179 }
180
181 if (!(hdr = sam_hdr_read(fp))) {
182 fprintf(stderr, "Couldn't read header\n");
183 exit(1);
184 }
185
186 const char *reg = argv[2];
187 while (*reg) {
188 int tid;
189 hts_pos_t beg, end;
190 reg = sam_parse_region(hdr, reg, &tid, &beg, &end, flags);
191 if (!reg) {
192 fprintf(stderr, "Failed to parse region\n");
193 exit(1);
194 }
195 printf("%-20s %12"PRIhts_pos" %12"PRIhts_pos"\n",
196 tid == -1 ? "*" : hdr->target_name[tid],
197 beg, end);
198 }
199
200 sam_hdr_destroy(hdr);
201 sam_close(fp);
202
203 return 0;
204 }
205