1 /*
2     Copyright (C) 2018-2019 Genome Research Ltd.
3 
4     Author: James Bonfield <jkb@sanger.ac.uk>
5 
6     Permission is hereby granted, free of charge, to any person obtaining a copy
7     of this software and associated documentation files (the "Software"), to deal
8     in the Software without restriction, including without limitation the rights
9     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10     copies of the Software, and to permit persons to whom the Software is
11     furnished to do so, subject to the following conditions:
12 
13     The above copyright notice and this permission notice shall be included in
14     all copies or substantial portions of the Software.
15 
16     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22     THE SOFTWARE.
23 */
24 
25 /*
26     Test region description parser.
27     Usage: test-parse-reg [-c] file.bam region
28            test-parse-reg [-c] -m file.bam region,region...
29            test-parse-reg -t
30 
31     -c is chr:pos is a single base coordinate, ie chr:pos-pos,
32        otherwise it is chr:pos-<end>
33     -m is multi-region list.
34     -t runs built-in tests
35 
36     ./test/test-parse-reg -c -m test/colons.bam "{chr1:100-200},{chr1}:100-200,{chr1:100-200}:100,{chr1,chr3},chr1:"
37 */
38 
39 #include <config.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <stdint.h>
45 #include <inttypes.h>
46 
47 #include "../htslib/hts.h"
48 #include "../htslib/sam.h"
49 
reg_expected(sam_hdr_t * hdr,const char * reg,int flags,char * reg_exp,int tid_exp,hts_pos_t beg_exp,hts_pos_t end_exp)50 void reg_expected(sam_hdr_t *hdr, const char *reg, int flags,
51                  char *reg_exp, int tid_exp, hts_pos_t beg_exp, hts_pos_t end_exp) {
52     const char *reg_out;
53     int tid_out = -1;
54     hts_pos_t beg_out = -1, end_out = -1;
55 
56     reg_out = sam_parse_region(hdr, reg, &tid_out, &beg_out, &end_out, flags);
57 
58     if ((reg_out != NULL) != (reg_exp != NULL) ||
59         (reg_out && reg_exp && strcmp(reg_out, reg_exp) != 0) ||
60         (reg_exp && tid_out != tid_exp) ||
61         (reg_exp && beg_out != beg_exp) ||
62         (reg_exp && end_out != end_exp)) {
63         fprintf(stderr, "Parsing \"%s\" expected return \"%s\", %d:%"PRIhts_pos"-%"PRIhts_pos", "
64                 "but got \"%s\", %d:%"PRIhts_pos"-%"PRIhts_pos"\n",
65                 reg,
66                 reg_exp?reg_exp:"(null)", tid_exp, beg_exp, end_exp,
67                 reg_out?reg_out:"(null)", tid_out, beg_out, end_out);
68         exit(1);
69     }
70 }
71 
reg_test(char * fn)72 int reg_test(char *fn) {
73     samFile *fp;
74     sam_hdr_t *hdr;
75 
76     if (!(fp = sam_open(fn, "r")))
77         return 1;
78 
79     if (!(hdr = sam_hdr_read(fp)))
80         return 1;
81 
82     // 0 chr1
83     // 1 chr1:100
84     // 2 chr1:100-200
85     // 3 chr2:100-200
86     // 4 chr3
87     // 5 chr1,chr3
88 
89     // Check range extensions.
90     reg_expected(hdr, "chr1", 0, "",  0, 0, HTS_POS_MAX);
91     reg_expected(hdr, "chr1:50", 0, "",  0, 49, HTS_POS_MAX);
92     reg_expected(hdr, "chr1:50", HTS_PARSE_ONE_COORD, "",  0, 49, 50);
93     reg_expected(hdr, "chr1:50-100", 0, "",  0, 49, 100);
94     reg_expected(hdr, "chr1:50-", 0, "",  0, 49, HTS_POS_MAX);
95     reg_expected(hdr, "chr1:-50", 0, "",  0, 0, 50);
96 
97     // Check quoting
98     fprintf(stderr, "Expected error: ");
99     reg_expected(hdr, "chr1:100-200", 0, NULL,  0, 0, 0); // ambiguous
100     reg_expected(hdr, "{chr1}:100-200", 0, "",  0, 99, 200);
101     reg_expected(hdr, "{chr1:100-200}", 0, "",  2, 0, HTS_POS_MAX);
102     reg_expected(hdr, "{chr1:100-200}:100-200", 0, "",  2, 99, 200);
103     reg_expected(hdr, "{chr2:100-200}:100-200", 0, "",  3, 99, 200);
104     reg_expected(hdr, "chr2:100-200:100-200", 0, "",  3, 99, 200);
105     reg_expected(hdr, "chr2:100-200", 0, "",  3, 0, HTS_POS_MAX);
106 
107     // Check numerics
108     reg_expected(hdr, "chr3", 0, "",  4, 0, HTS_POS_MAX);
109     reg_expected(hdr, "chr3:", 0, "",  4, 0, HTS_POS_MAX);
110     reg_expected(hdr, "chr3:1000-1500", 0, "",  4, 999, 1500);
111     reg_expected(hdr, "chr3:1,000-1,500", 0, "",  4, 999, 1500);
112     reg_expected(hdr, "chr3:1k-1.5K", 0, "",  4, 999, 1500);
113     reg_expected(hdr, "chr3:1e3-1.5e3", 0, "",  4, 999, 1500);
114     reg_expected(hdr, "chr3:1e3-15e2", 0, "",  4, 999, 1500);
115 
116     // Check list mode
117     reg_expected(hdr, "chr1,chr3", HTS_PARSE_LIST, "chr3", 0, 0, HTS_POS_MAX);
118     fprintf(stderr, "Expected error: ");
119     reg_expected(hdr, "chr1:100-200,chr3", HTS_PARSE_LIST, NULL,  0, 0, 0); // ambiguous
120     reg_expected(hdr, "{chr1,chr3}", HTS_PARSE_LIST, "", 5, 0, HTS_POS_MAX);
121     reg_expected(hdr, "{chr1,chr3},chr1", HTS_PARSE_LIST, "chr1", 5, 0, HTS_POS_MAX);
122     // incorrect usage; first reg is valid (but not what user expects).
123     reg_expected(hdr, "chr3:1,000-1,500", HTS_PARSE_LIST | HTS_PARSE_ONE_COORD, "000-1,500",  4, 0, 1);
124 
125     // More expected failures
126     reg_expected(hdr, "chr2", 0, NULL, 0, 0, 0);
127     reg_expected(hdr, "chr1,", 0, NULL, 0, 0, 0);
128     fprintf(stderr, "Expected error: ");
129     reg_expected(hdr, "{chr1", 0, NULL, 0, 0, 0);
130     reg_expected(hdr, "chr1:10-10", 0, "", 0, 9, 10); // OK
131     reg_expected(hdr, "chr1:10-9", 0, NULL, 0, 0, 0); // Issue#353
132     fprintf(stderr, "Expected error: ");
133     reg_expected(hdr, "chr1:x", 0, NULL, 0, 0, 0);
134     fprintf(stderr, "Expected error: ");
135     reg_expected(hdr, "chr1:1-y", 0, NULL, 0, 0, 0);
136     fprintf(stderr, "Expected error: ");
137     reg_expected(hdr, "chr1:1,chr3", 0, NULL, 0, 0, 0);
138 
139     sam_hdr_destroy(hdr);
140     sam_close(fp);
141 
142     exit(0);
143 }
144 
main(int argc,char ** argv)145 int main(int argc, char **argv) {
146     sam_hdr_t *hdr;
147     samFile *fp;
148     int flags = 0;
149 
150     while (argc > 1) {
151         if (strcmp(argv[1], "-m") == 0) {
152             flags |= HTS_PARSE_LIST;
153             argc--; argv++;
154             continue;
155         }
156 
157         if (strcmp(argv[1], "-c") == 0) {
158             flags |= HTS_PARSE_ONE_COORD;
159             argc--; argv++;
160             continue;
161         }
162 
163         // Automatic mode for test harness
164         if (strcmp(argv[1], "-t") == 0)
165             reg_test(argv[2]);
166 
167         break;
168     }
169 
170     // Interactive mode for debugging
171     if (argc != 3) {
172         fprintf(stderr, "Usage: test-parse-reg [-m] [-c] region[,region]...\n");
173         exit(1);
174     }
175 
176     if (!(fp = sam_open(argv[1], "r"))) {
177         perror(argv[1]);
178         exit(1);
179     }
180 
181     if (!(hdr = sam_hdr_read(fp))) {
182         fprintf(stderr, "Couldn't read header\n");
183         exit(1);
184     }
185 
186     const char *reg = argv[2];
187     while (*reg) {
188         int tid;
189         hts_pos_t beg, end;
190         reg = sam_parse_region(hdr, reg, &tid, &beg, &end, flags);
191         if (!reg) {
192             fprintf(stderr, "Failed to parse region\n");
193             exit(1);
194         }
195         printf("%-20s %12"PRIhts_pos" %12"PRIhts_pos"\n",
196                tid == -1 ? "*" : hdr->target_name[tid],
197                beg, end);
198     }
199 
200     sam_hdr_destroy(hdr);
201     sam_close(fp);
202 
203     return 0;
204 }
205