1 /* multipart.c -- GA4GH redirection and multipart backend for file streams.
2
3 Copyright (C) 2016 Genome Research Ltd.
4
5 Author: John Marshall <jm18@sanger.ac.uk>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
24
25 #include <config.h>
26
27 #include <stdio.h>
28 #include <string.h>
29 #include <errno.h>
30
31 #include "htslib/kstring.h"
32
33 #include "hts_internal.h"
34 #include "hfile_internal.h"
35
36 #ifndef EPROTO
37 #define EPROTO ENOEXEC
38 #endif
39
40 typedef struct hfile_part {
41 char *url;
42 char **headers;
43 } hfile_part;
44
45 typedef struct {
46 hFILE base;
47 hfile_part *parts;
48 size_t nparts, maxparts, current;
49 hFILE *currentfp;
50 } hFILE_multipart;
51
free_part(hfile_part * p)52 static void free_part(hfile_part *p)
53 {
54 free(p->url);
55 if (p->headers) {
56 char **hdr;
57 for (hdr = p->headers; *hdr; hdr++) free(*hdr);
58 free(p->headers);
59 }
60
61 p->url = NULL;
62 p->headers = NULL;
63 }
64
free_all_parts(hFILE_multipart * fp)65 static void free_all_parts(hFILE_multipart *fp)
66 {
67 size_t i;
68 for (i = 0; i < fp->nparts; i++) free_part(&fp->parts[i]);
69 free(fp->parts);
70 }
71
multipart_read(hFILE * fpv,void * buffer,size_t nbytes)72 static ssize_t multipart_read(hFILE *fpv, void *buffer, size_t nbytes)
73 {
74 hFILE_multipart *fp = (hFILE_multipart *) fpv;
75 size_t n;
76
77 open_next:
78 if (fp->currentfp == NULL) {
79 if (fp->current < fp->nparts) {
80 const hfile_part *p = &fp->parts[fp->current];
81 if (hts_verbose >= 5)
82 fprintf(stderr, "[M::multipart_read] opening part #%zu of %zu:"
83 " \"%.120s%s\"\n", fp->current+1, fp->nparts, p->url,
84 (strlen(p->url) > 120)? "..." : "");
85
86 fp->currentfp = p->headers?
87 hopen(p->url, "r:", "httphdr:v", p->headers, NULL)
88 : hopen(p->url, "r");
89
90 if (fp->currentfp == NULL) return -1;
91 }
92 else return 0; // No more parts, so we're truly at EOF
93 }
94
95 n = fp->currentfp->mobile?
96 fp->currentfp->backend->read(fp->currentfp, buffer, nbytes)
97 : hread(fp->currentfp, buffer, nbytes);
98
99 if (n == 0) {
100 // We're at EOF on this part, so set up the next part
101 hFILE *prevfp = fp->currentfp;
102 free_part(&fp->parts[fp->current]);
103 fp->current++;
104 fp->currentfp = NULL;
105 if (hclose(prevfp) < 0) return -1;
106 goto open_next;
107 }
108
109 return n; // Number of bytes read by (or an error from) fp->currentfp
110 }
111
multipart_write(hFILE * fpv,const void * buffer,size_t nbytes)112 static ssize_t multipart_write(hFILE *fpv, const void *buffer, size_t nbytes)
113 {
114 errno = EROFS;
115 return -1;
116 }
117
multipart_seek(hFILE * fpv,off_t offset,int whence)118 static off_t multipart_seek(hFILE *fpv, off_t offset, int whence)
119 {
120 errno = ESPIPE;
121 return -1;
122 }
123
multipart_close(hFILE * fpv)124 static int multipart_close(hFILE *fpv)
125 {
126 hFILE_multipart *fp = (hFILE_multipart *) fpv;
127
128 free_all_parts(fp);
129 if (fp->currentfp) {
130 if (hclose(fp->currentfp) < 0) return -1;
131 }
132
133 return 0;
134 }
135
136 static const struct hFILE_backend multipart_backend =
137 {
138 multipart_read, multipart_write, multipart_seek, NULL, multipart_close
139 };
140
141 // Returns 'v' (valid value), 'i' (invalid; required GA4GH field missing),
142 // or upon encountering an unexpected token, that token's type.
143 // Explicit `return '?'` means a JSON parsing error, typically a member key
144 // that is not a string. An unexpected token may be a valid token that was
145 // not the type expected for a particular GA4GH field, or it may be '?' or
146 // '\0' which should be propagated.
147 static char
parse_ga4gh_redirect_json(hFILE_multipart * fp,hFILE * json,kstring_t * b,kstring_t * header)148 parse_ga4gh_redirect_json(hFILE_multipart *fp, hFILE *json,
149 kstring_t *b, kstring_t *header)
150 {
151 hts_json_token t;
152
153 if (hts_json_fnext(json, &t, b) != '{') return t.type;
154 while (hts_json_fnext(json, &t, b) != '}') {
155 if (t.type != 's') return '?';
156
157 if (strcmp(t.str, "urls") == 0) {
158 if (hts_json_fnext(json, &t, b) != '[') return t.type;
159
160 while (hts_json_fnext(json, &t, b) != ']') {
161 hfile_part *part;
162 size_t n = 0, max = 0;
163
164 hts_expand(hfile_part, fp->nparts+1, fp->maxparts, fp->parts);
165 part = &fp->parts[fp->nparts++];
166 part->url = NULL;
167 part->headers = NULL;
168
169 if (t.type != '{') return t.type;
170 while (hts_json_fnext(json, &t, b) != '}') {
171 if (t.type != 's') return '?';
172
173 if (strcmp(t.str, "url") == 0) {
174 if (hts_json_fnext(json, &t, b) != 's') return t.type;
175 part->url = ks_release(b);
176 }
177 else if (strcmp(t.str, "headers") == 0) {
178 if (hts_json_fnext(json, &t, b) != '{') return t.type;
179
180 while (hts_json_fnext(json, &t, header) != '}') {
181 if (t.type != 's') return '?';
182
183 if (hts_json_fnext(json, &t, b) != 's')
184 return t.type;
185
186 kputs(": ", header);
187 kputs(t.str, header);
188 n++;
189 hts_expand(char *, n+1, max, part->headers);
190 part->headers[n-1] = ks_release(header);
191 part->headers[n] = NULL;
192 }
193 }
194 else if (hts_json_fskip_value(json, '\0') != 'v')
195 return '?';
196 }
197
198 if (! part->url) return 'i';
199 }
200 }
201 else if (strcmp(t.str, "format") == 0) {
202 if (hts_json_fnext(json, &t, b) != 's') return t.type;
203
204 if (hts_verbose >= 5)
205 fprintf(stderr, "[M::multipart_open] GA4GH JSON redirection "
206 "to multipart %s data\n", t.str);
207 }
208 else if (hts_json_fskip_value(json, '\0') != 'v') return '?';
209 }
210
211 if (hts_json_fnext(json, &t, b) != '\0') return '?';
212
213 return 'v';
214 }
215
hopen_json_redirect(hFILE * hfile,const char * mode)216 hFILE *hopen_json_redirect(hFILE *hfile, const char *mode)
217 {
218 hFILE_multipart *fp;
219 kstring_t s1 = { 0, 0, NULL }, s2 = { 0, 0, NULL };
220 char ret;
221
222 fp = (hFILE_multipart *) hfile_init(sizeof (hFILE_multipart), mode, 0);
223 if (fp == NULL) return NULL;
224
225 fp->parts = NULL;
226 fp->nparts = fp->maxparts = 0;
227
228 ret = parse_ga4gh_redirect_json(fp, hfile, &s1, &s2);
229 free(s1.s);
230 free(s2.s);
231 if (ret != 'v') {
232 free_all_parts(fp);
233 hfile_destroy((hFILE *) fp);
234 errno = (ret == '?' || ret == '\0')? EPROTO : EINVAL;
235 return NULL;
236 }
237
238 fp->current = 0;
239 fp->currentfp = NULL;
240 fp->base.backend = &multipart_backend;
241 return &fp->base;
242 }
243