1 /* multipart.c -- GA4GH redirection and multipart backend for file streams.
2
3 Copyright (C) 2016-2017 Genome Research Ltd.
4
5 Author: John Marshall <jm18@sanger.ac.uk>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
24
25 #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26 #include <config.h>
27
28 #include <stdio.h>
29 #include <string.h>
30 #include <errno.h>
31
32 #include "htslib/kstring.h"
33
34 #include "hts_internal.h"
35 #include "hfile_internal.h"
36
37 #ifndef EPROTO
38 #define EPROTO ENOEXEC
39 #endif
40
41 typedef struct hfile_part {
42 char *url;
43 char **headers;
44 } hfile_part;
45
46 typedef struct {
47 hFILE base;
48 hfile_part *parts;
49 size_t nparts, maxparts, current;
50 hFILE *currentfp;
51 } hFILE_multipart;
52
free_part(hfile_part * p)53 static void free_part(hfile_part *p)
54 {
55 free(p->url);
56 if (p->headers) {
57 char **hdr;
58 for (hdr = p->headers; *hdr; hdr++) free(*hdr);
59 free(p->headers);
60 }
61
62 p->url = NULL;
63 p->headers = NULL;
64 }
65
free_all_parts(hFILE_multipart * fp)66 static void free_all_parts(hFILE_multipart *fp)
67 {
68 size_t i;
69 for (i = 0; i < fp->nparts; i++) free_part(&fp->parts[i]);
70 free(fp->parts);
71 }
72
multipart_read(hFILE * fpv,void * buffer,size_t nbytes)73 static ssize_t multipart_read(hFILE *fpv, void *buffer, size_t nbytes)
74 {
75 hFILE_multipart *fp = (hFILE_multipart *) fpv;
76 size_t n;
77
78 open_next:
79 if (fp->currentfp == NULL) {
80 if (fp->current < fp->nparts) {
81 const hfile_part *p = &fp->parts[fp->current];
82 hts_log_debug("Opening part #%zu of %zu: \"%.120s%s\"",
83 fp->current+1, fp->nparts, p->url,
84 (strlen(p->url) > 120)? "..." : "");
85
86 fp->currentfp = p->headers?
87 hopen(p->url, "r:",
88 "httphdr:v", p->headers,
89 "auth_token_enabled", "false", NULL)
90 : hopen(p->url, "r:", "auth_token_enabled", "false", NULL);
91
92 if (fp->currentfp == NULL) return -1;
93 }
94 else return 0; // No more parts, so we're truly at EOF
95 }
96
97 n = fp->currentfp->mobile?
98 fp->currentfp->backend->read(fp->currentfp, buffer, nbytes)
99 : hread(fp->currentfp, buffer, nbytes);
100
101 if (n == 0) {
102 // We're at EOF on this part, so set up the next part
103 hFILE *prevfp = fp->currentfp;
104 free_part(&fp->parts[fp->current]);
105 fp->current++;
106 fp->currentfp = NULL;
107 if (hclose(prevfp) < 0) return -1;
108 goto open_next;
109 }
110
111 return n; // Number of bytes read by (or an error from) fp->currentfp
112 }
113
multipart_write(hFILE * fpv,const void * buffer,size_t nbytes)114 static ssize_t multipart_write(hFILE *fpv, const void *buffer, size_t nbytes)
115 {
116 errno = EROFS;
117 return -1;
118 }
119
multipart_seek(hFILE * fpv,off_t offset,int whence)120 static off_t multipart_seek(hFILE *fpv, off_t offset, int whence)
121 {
122 errno = ESPIPE;
123 return -1;
124 }
125
multipart_close(hFILE * fpv)126 static int multipart_close(hFILE *fpv)
127 {
128 hFILE_multipart *fp = (hFILE_multipart *) fpv;
129
130 free_all_parts(fp);
131 if (fp->currentfp) {
132 if (hclose(fp->currentfp) < 0) return -1;
133 }
134
135 return 0;
136 }
137
138 static const struct hFILE_backend multipart_backend =
139 {
140 multipart_read, multipart_write, multipart_seek, NULL, multipart_close
141 };
142
143 // Returns 'v' (valid value), 'i' (invalid; required GA4GH field missing),
144 // or upon encountering an unexpected token, that token's type.
145 // Explicit `return '?'` means a JSON parsing error, typically a member key
146 // that is not a string. An unexpected token may be a valid token that was
147 // not the type expected for a particular GA4GH field, or it may be '?' or
148 // '\0' which should be propagated.
149 static char
parse_ga4gh_body_json(hFILE_multipart * fp,hFILE * json,kstring_t * b,kstring_t * header)150 parse_ga4gh_body_json(hFILE_multipart *fp, hFILE *json,
151 kstring_t *b, kstring_t *header)
152 {
153 hts_json_token t;
154
155 if (hts_json_fnext(json, &t, b) != '{') return t.type;
156 while (hts_json_fnext(json, &t, b) != '}') {
157 if (t.type != 's') return '?';
158
159 if (strcmp(t.str, "urls") == 0) {
160 if (hts_json_fnext(json, &t, b) != '[') return t.type;
161
162 while (hts_json_fnext(json, &t, b) != ']') {
163 hfile_part *part;
164 size_t n = 0, max = 0;
165
166 hts_expand(hfile_part, fp->nparts+1, fp->maxparts, fp->parts);
167 part = &fp->parts[fp->nparts++];
168 part->url = NULL;
169 part->headers = NULL;
170
171 if (t.type != '{') return t.type;
172 while (hts_json_fnext(json, &t, b) != '}') {
173 if (t.type != 's') return '?';
174
175 if (strcmp(t.str, "url") == 0) {
176 if (hts_json_fnext(json, &t, b) != 's') return t.type;
177 part->url = ks_release(b);
178 }
179 else if (strcmp(t.str, "headers") == 0) {
180 if (hts_json_fnext(json, &t, b) != '{') return t.type;
181
182 while (hts_json_fnext(json, &t, header) != '}') {
183 if (t.type != 's') return '?';
184
185 if (hts_json_fnext(json, &t, b) != 's')
186 return t.type;
187
188 kputs(": ", header);
189 kputs(t.str, header);
190 n++;
191 hts_expand(char *, n+1, max, part->headers);
192 part->headers[n-1] = ks_release(header);
193 part->headers[n] = NULL;
194 }
195 }
196 else if (hts_json_fskip_value(json, '\0') != 'v')
197 return '?';
198 }
199
200 if (! part->url) return 'i';
201 }
202 }
203 else if (strcmp(t.str, "format") == 0) {
204 if (hts_json_fnext(json, &t, b) != 's') return t.type;
205
206 hts_log_debug("GA4GH JSON redirection to multipart %s data", t.str);
207 }
208 else if (hts_json_fskip_value(json, '\0') != 'v') return '?';
209 }
210
211 return 'v';
212 }
213
214 // Returns 'v' (valid value), 'i' (invalid; required GA4GH field missing),
215 // or upon encountering an unexpected token, that token's type.
216 // Explicit `return '?'` means a JSON parsing error, typically a member key
217 // that is not a string. An unexpected token may be a valid token that was
218 // not the type expected for a particular GA4GH field, or it may be '?' or
219 // '\0' which should be propagated.
220 static char
parse_ga4gh_redirect_json(hFILE_multipart * fp,hFILE * json,kstring_t * b,kstring_t * header)221 parse_ga4gh_redirect_json(hFILE_multipart *fp, hFILE *json,
222 kstring_t *b, kstring_t *header) {
223 hts_json_token t;
224
225 if (hts_json_fnext(json, &t, b) != '{') return t.type;
226 while (hts_json_fnext(json, &t, b) != '}') {
227 if (t.type != 's') return '?';
228
229 if (strcmp(t.str, "htsget") == 0) {
230 char ret = parse_ga4gh_body_json(fp, json, b, header);
231 if (ret != 'v') return ret;
232 }
233 else return '?';
234 }
235
236 if (hts_json_fnext(json, &t, b) != '\0') return '?';
237
238 return 'v';
239 }
240
hopen_htsget_redirect(hFILE * hfile,const char * mode)241 hFILE *hopen_htsget_redirect(hFILE *hfile, const char *mode)
242 {
243 hFILE_multipart *fp;
244 kstring_t s1 = { 0, 0, NULL }, s2 = { 0, 0, NULL };
245 char ret;
246
247 fp = (hFILE_multipart *) hfile_init(sizeof (hFILE_multipart), mode, 0);
248 if (fp == NULL) return NULL;
249
250 fp->parts = NULL;
251 fp->nparts = fp->maxparts = 0;
252
253 ret = parse_ga4gh_redirect_json(fp, hfile, &s1, &s2);
254 free(s1.s);
255 free(s2.s);
256 if (ret != 'v') {
257 free_all_parts(fp);
258 hfile_destroy((hFILE *) fp);
259 errno = (ret == '?' || ret == '\0')? EPROTO : EINVAL;
260 return NULL;
261 }
262
263 fp->current = 0;
264 fp->currentfp = NULL;
265 fp->base.backend = &multipart_backend;
266 return &fp->base;
267 }
268