1 /*  multipart.c -- GA4GH redirection and multipart backend for file streams.
2 
3     Copyright (C) 2016 Genome Research Ltd.
4 
5     Author: John Marshall <jm18@sanger.ac.uk>
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE.  */
24 
25 #include <config.h>
26 
27 #include <stdio.h>
28 #include <string.h>
29 #include <errno.h>
30 
31 #include "htslib/kstring.h"
32 
33 #include "hts_internal.h"
34 #include "hfile_internal.h"
35 
36 #ifndef EPROTO
37 #define EPROTO ENOEXEC
38 #endif
39 
40 typedef struct hfile_part {
41     char *url;
42     char **headers;
43 } hfile_part;
44 
45 typedef struct {
46     hFILE base;
47     hfile_part *parts;
48     size_t nparts, maxparts, current;
49     hFILE *currentfp;
50 } hFILE_multipart;
51 
free_part(hfile_part * p)52 static void free_part(hfile_part *p)
53 {
54     free(p->url);
55     if (p->headers) {
56         char **hdr;
57         for (hdr = p->headers; *hdr; hdr++) free(*hdr);
58         free(p->headers);
59     }
60 
61     p->url = NULL;
62     p->headers = NULL;
63 }
64 
free_all_parts(hFILE_multipart * fp)65 static void free_all_parts(hFILE_multipart *fp)
66 {
67     size_t i;
68     for (i = 0; i < fp->nparts; i++) free_part(&fp->parts[i]);
69     free(fp->parts);
70 }
71 
multipart_read(hFILE * fpv,void * buffer,size_t nbytes)72 static ssize_t multipart_read(hFILE *fpv, void *buffer, size_t nbytes)
73 {
74     hFILE_multipart *fp = (hFILE_multipart *) fpv;
75     size_t n;
76 
77 open_next:
78     if (fp->currentfp == NULL) {
79         if (fp->current < fp->nparts) {
80             const hfile_part *p = &fp->parts[fp->current];
81             if (hts_verbose >= 5)
82                 fprintf(stderr, "[M::multipart_read] opening part #%zu of %zu:"
83                         " \"%.120s%s\"\n", fp->current+1, fp->nparts, p->url,
84                         (strlen(p->url) > 120)? "..." : "");
85 
86             fp->currentfp = p->headers?
87                   hopen(p->url, "r:", "httphdr:v", p->headers, NULL)
88                 : hopen(p->url, "r");
89 
90             if (fp->currentfp == NULL) return -1;
91         }
92         else return 0;  // No more parts, so we're truly at EOF
93     }
94 
95     n = fp->currentfp->mobile?
96           fp->currentfp->backend->read(fp->currentfp, buffer, nbytes)
97         : hread(fp->currentfp, buffer, nbytes);
98 
99     if (n == 0) {
100         // We're at EOF on this part, so set up the next part
101         hFILE *prevfp = fp->currentfp;
102         free_part(&fp->parts[fp->current]);
103         fp->current++;
104         fp->currentfp = NULL;
105         if (hclose(prevfp) < 0) return -1;
106         goto open_next;
107     }
108 
109     return n;  // Number of bytes read by (or an error from) fp->currentfp
110 }
111 
multipart_write(hFILE * fpv,const void * buffer,size_t nbytes)112 static ssize_t multipart_write(hFILE *fpv, const void *buffer, size_t nbytes)
113 {
114     errno = EROFS;
115     return -1;
116 }
117 
multipart_seek(hFILE * fpv,off_t offset,int whence)118 static off_t multipart_seek(hFILE *fpv, off_t offset, int whence)
119 {
120     errno = ESPIPE;
121     return -1;
122 }
123 
multipart_close(hFILE * fpv)124 static int multipart_close(hFILE *fpv)
125 {
126     hFILE_multipart *fp = (hFILE_multipart *) fpv;
127 
128     free_all_parts(fp);
129     if (fp->currentfp) {
130         if (hclose(fp->currentfp) < 0) return -1;
131     }
132 
133     return 0;
134 }
135 
136 static const struct hFILE_backend multipart_backend =
137 {
138     multipart_read, multipart_write, multipart_seek, NULL, multipart_close
139 };
140 
141 // Returns 'v' (valid value), 'i' (invalid; required GA4GH field missing),
142 // or upon encountering an unexpected token, that token's type.
143 // Explicit `return '?'` means a JSON parsing error, typically a member key
144 // that is not a string.  An unexpected token may be a valid token that was
145 // not the type expected for a particular GA4GH field, or it may be '?' or
146 // '\0' which should be propagated.
147 static char
parse_ga4gh_redirect_json(hFILE_multipart * fp,hFILE * json,kstring_t * b,kstring_t * header)148 parse_ga4gh_redirect_json(hFILE_multipart *fp, hFILE *json,
149                           kstring_t *b, kstring_t *header)
150 {
151     hts_json_token t;
152 
153     if (hts_json_fnext(json, &t, b) != '{') return t.type;
154     while (hts_json_fnext(json, &t, b) != '}') {
155         if (t.type != 's') return '?';
156 
157         if (strcmp(t.str, "urls") == 0) {
158             if (hts_json_fnext(json, &t, b) != '[') return t.type;
159 
160             while (hts_json_fnext(json, &t, b) != ']') {
161                 hfile_part *part;
162                 size_t n = 0, max = 0;
163 
164                 hts_expand(hfile_part, fp->nparts+1, fp->maxparts, fp->parts);
165                 part = &fp->parts[fp->nparts++];
166                 part->url = NULL;
167                 part->headers = NULL;
168 
169                 if (t.type != '{') return t.type;
170                 while (hts_json_fnext(json, &t, b) != '}') {
171                     if (t.type != 's') return '?';
172 
173                     if (strcmp(t.str, "url") == 0) {
174                         if (hts_json_fnext(json, &t, b) != 's') return t.type;
175                         part->url = ks_release(b);
176                     }
177                     else if (strcmp(t.str, "headers") == 0) {
178                         if (hts_json_fnext(json, &t, b) != '{') return t.type;
179 
180                         while (hts_json_fnext(json, &t, header) != '}') {
181                             if (t.type != 's') return '?';
182 
183                             if (hts_json_fnext(json, &t, b) != 's')
184                                 return t.type;
185 
186                             kputs(": ", header);
187                             kputs(t.str, header);
188                             n++;
189                             hts_expand(char *, n+1, max, part->headers);
190                             part->headers[n-1] = ks_release(header);
191                             part->headers[n] = NULL;
192                         }
193                     }
194                     else if (hts_json_fskip_value(json, '\0') != 'v')
195                         return '?';
196                 }
197 
198                 if (! part->url) return 'i';
199             }
200         }
201         else if (strcmp(t.str, "format") == 0) {
202             if (hts_json_fnext(json, &t, b) != 's') return t.type;
203 
204             if (hts_verbose >= 5)
205                 fprintf(stderr, "[M::multipart_open] GA4GH JSON redirection "
206                         "to multipart %s data\n", t.str);
207         }
208         else if (hts_json_fskip_value(json, '\0') != 'v') return '?';
209     }
210 
211     if (hts_json_fnext(json, &t, b) != '\0') return '?';
212 
213     return 'v';
214 }
215 
hopen_json_redirect(hFILE * hfile,const char * mode)216 hFILE *hopen_json_redirect(hFILE *hfile, const char *mode)
217 {
218     hFILE_multipart *fp;
219     kstring_t s1 = { 0, 0, NULL }, s2 = { 0, 0, NULL };
220     char ret;
221 
222     fp = (hFILE_multipart *) hfile_init(sizeof (hFILE_multipart), mode, 0);
223     if (fp == NULL) return NULL;
224 
225     fp->parts = NULL;
226     fp->nparts = fp->maxparts = 0;
227 
228     ret = parse_ga4gh_redirect_json(fp, hfile, &s1, &s2);
229     free(s1.s);
230     free(s2.s);
231     if (ret != 'v') {
232         free_all_parts(fp);
233         hfile_destroy((hFILE *) fp);
234         errno = (ret == '?' || ret == '\0')? EPROTO : EINVAL;
235         return NULL;
236     }
237 
238     fp->current = 0;
239     fp->currentfp = NULL;
240     fp->base.backend = &multipart_backend;
241     return &fp->base;
242 }
243