1 /*  hfile_libcurl.c -- libcurl backend for low-level file streams.
2 
3     Copyright (C) 2015-2017 Genome Research Ltd.
4 
5     Author: John Marshall <jm18@sanger.ac.uk>
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE.  */
24 
25 #include <config.h>
26 
27 #include <stdarg.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <errno.h>
31 #include <pthread.h>
32 #ifndef _WIN32
33 # include <sys/select.h>
34 #endif
35 #include <assert.h>
36 
37 #include "hfile_internal.h"
38 #ifdef ENABLE_PLUGINS
39 #include "version.h"
40 #endif
41 #include "htslib/hts.h"  // for hts_version() and hts_verbose
42 #include "htslib/kstring.h"
43 #include "htslib/khash.h"
44 
45 #include <curl/curl.h>
46 
47 // Number of seconds to take off auth_token expiry, to allow for clock skew
48 // and slow servers
49 #define AUTH_REFRESH_EARLY_SECS 60
50 
51 // Minimum number of bytes to skip when seeking forward.  Seeks less than
52 // this will just read the data and throw it away.  The optimal value
53 // depends on how long it takes to make a new connection compared
54 // to how fast the data arrives.
55 #define MIN_SEEK_FORWARD 1000000
56 
57 typedef struct {
58     char *path;
59     char *token;
60     time_t expiry;
61     int failed;
62     pthread_mutex_t lock;
63 } auth_token;
64 
65 // For the authorization header cache
66 KHASH_MAP_INIT_STR(auth_map, auth_token *)
67 
68 // Curl-compatible header linked list
69 typedef struct {
70     struct curl_slist *list;
71     unsigned int num;
72     unsigned int size;
73 } hdrlist;
74 
75 typedef struct {
76     hdrlist fixed;                   // List of headers supplied at hopen()
77     hdrlist extra;                   // List of headers from callback
78     hts_httphdr_callback callback;   // Callback to get more headers
79     void *callback_data;             // Data to pass to callback
80     auth_token *auth;                // Authentication token
81     int auth_hdr_num;                // Location of auth_token in hdrlist extra
82                                      // If -1, Authorization header is in fixed
83                                      //    -2, it came from the callback
84                                      //    -3, "auth_token_enabled", "false"
85                                      //        passed to hopen()
86 } http_headers;
87 
88 typedef struct {
89     hFILE base;
90     CURL *easy;
91     CURLM *multi;
92     off_t file_size;
93     struct {
94         union { char *rd; const char *wr; } ptr;
95         size_t len;
96     } buffer;
97     CURLcode final_result;  // easy result code for finished transfers
98     // Flags for communicating with libcurl callbacks:
99     unsigned paused : 1;    // callback tells us that it has paused transfer
100     unsigned closing : 1;   // informs callback that hclose() has been invoked
101     unsigned finished : 1;  // wait_perform() tells us transfer is complete
102     unsigned perform_again : 1;
103     unsigned is_read : 1;   // Opened in read mode
104     unsigned can_seek : 1;  // Can (attempt to) seek on this handle
105     unsigned is_recursive:1; // Opened by hfile_libcurl itself
106     unsigned tried_seek : 1; // At least one seek has been attempted
107     int nrunning;
108     http_headers headers;
109     off_t delayed_seek;      // Location to seek to before reading
110     off_t last_offset;       // Location we're seeking from
111 } hFILE_libcurl;
112 
113 static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence);
114 static int restart_from_position(hFILE_libcurl *fp, off_t pos);
115 
http_status_errno(int status)116 static int http_status_errno(int status)
117 {
118     if (status >= 500)
119         switch (status) {
120         case 501: return ENOSYS;
121         case 503: return EBUSY;
122         case 504: return ETIMEDOUT;
123         default:  return EIO;
124         }
125     else if (status >= 400)
126         switch (status) {
127         case 401: return EPERM;
128         case 403: return EACCES;
129         case 404: return ENOENT;
130         case 405: return EROFS;
131         case 407: return EPERM;
132         case 408: return ETIMEDOUT;
133         case 410: return ENOENT;
134         default:  return EINVAL;
135         }
136     else return 0;
137 }
138 
easy_errno(CURL * easy,CURLcode err)139 static int easy_errno(CURL *easy, CURLcode err)
140 {
141     long lval;
142 
143     switch (err) {
144     case CURLE_OK:
145         return 0;
146 
147     case CURLE_UNSUPPORTED_PROTOCOL:
148     case CURLE_URL_MALFORMAT:
149         return EINVAL;
150 
151 #if LIBCURL_VERSION_NUM >= 0x071505
152     case CURLE_NOT_BUILT_IN:
153         return ENOSYS;
154 #endif
155 
156     case CURLE_COULDNT_RESOLVE_PROXY:
157     case CURLE_COULDNT_RESOLVE_HOST:
158     case CURLE_FTP_CANT_GET_HOST:
159         return EDESTADDRREQ; // Lookup failure
160 
161     case CURLE_COULDNT_CONNECT:
162     case CURLE_SEND_ERROR:
163     case CURLE_RECV_ERROR:
164         if (curl_easy_getinfo(easy, CURLINFO_OS_ERRNO, &lval) == CURLE_OK)
165             return lval;
166         else
167             return ECONNABORTED;
168 
169     case CURLE_REMOTE_ACCESS_DENIED:
170     case CURLE_LOGIN_DENIED:
171     case CURLE_TFTP_PERM:
172         return EACCES;
173 
174     case CURLE_PARTIAL_FILE:
175         return EPIPE;
176 
177     case CURLE_HTTP_RETURNED_ERROR:
178         if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &lval) == CURLE_OK)
179             return http_status_errno(lval);
180         else
181             return EIO;
182 
183     case CURLE_OUT_OF_MEMORY:
184         return ENOMEM;
185 
186     case CURLE_OPERATION_TIMEDOUT:
187         return ETIMEDOUT;
188 
189     case CURLE_RANGE_ERROR:
190         return ESPIPE;
191 
192     case CURLE_SSL_CONNECT_ERROR:
193         // TODO return SSL error buffer messages
194         return ECONNABORTED;
195 
196     case CURLE_FILE_COULDNT_READ_FILE:
197     case CURLE_TFTP_NOTFOUND:
198         return ENOENT;
199 
200     case CURLE_TOO_MANY_REDIRECTS:
201         return ELOOP;
202 
203     case CURLE_FILESIZE_EXCEEDED:
204         return EFBIG;
205 
206     case CURLE_REMOTE_DISK_FULL:
207         return ENOSPC;
208 
209     case CURLE_REMOTE_FILE_EXISTS:
210         return EEXIST;
211 
212     default:
213         return EIO;
214     }
215 }
216 
multi_errno(CURLMcode errm)217 static int multi_errno(CURLMcode errm)
218 {
219     switch (errm) {
220     case CURLM_CALL_MULTI_PERFORM:
221     case CURLM_OK:
222         return 0;
223 
224     case CURLM_BAD_HANDLE:
225     case CURLM_BAD_EASY_HANDLE:
226     case CURLM_BAD_SOCKET:
227         return EBADF;
228 
229     case CURLM_OUT_OF_MEMORY:
230         return ENOMEM;
231 
232     default:
233         return EIO;
234     }
235 }
236 
237 static struct {
238     kstring_t useragent;
239     CURLSH *share;
240     char *auth_path;
241     khash_t(auth_map) *auth_map;
242     int allow_unencrypted_auth_header;
243     pthread_mutex_t auth_lock;
244     pthread_mutex_t share_lock;
245 } curl = { { 0, 0, NULL }, NULL, NULL, NULL, 0, PTHREAD_MUTEX_INITIALIZER,
246            PTHREAD_MUTEX_INITIALIZER };
247 
share_lock(CURL * handle,curl_lock_data data,curl_lock_access access,void * userptr)248 static void share_lock(CURL *handle, curl_lock_data data,
249                        curl_lock_access access, void *userptr) {
250     pthread_mutex_lock(&curl.share_lock);
251 }
252 
share_unlock(CURL * handle,curl_lock_data data,void * userptr)253 static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
254     pthread_mutex_unlock(&curl.share_lock);
255 }
256 
free_auth(auth_token * tok)257 static void free_auth(auth_token *tok) {
258     if (!tok) return;
259     if (pthread_mutex_destroy(&tok->lock)) abort();
260     free(tok->path);
261     free(tok->token);
262     free(tok);
263 }
264 
libcurl_exit()265 static void libcurl_exit()
266 {
267     if (curl_share_cleanup(curl.share) == CURLSHE_OK)
268         curl.share = NULL;
269 
270     free(curl.useragent.s);
271     curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
272 
273     free(curl.auth_path);
274     curl.auth_path = NULL;
275 
276     if (curl.auth_map) {
277         khiter_t i;
278         for (i = kh_begin(curl.auth_map); i != kh_end(curl.auth_map); ++i) {
279             if (kh_exist(curl.auth_map, i)) {
280                 free_auth(kh_value(curl.auth_map, i));
281                 kh_key(curl.auth_map, i) = NULL;
282                 kh_value(curl.auth_map, i) = NULL;
283             }
284         }
285         kh_destroy(auth_map, curl.auth_map);
286         curl.auth_map = NULL;
287     }
288 
289     curl_global_cleanup();
290 }
291 
append_header(hdrlist * hdrs,const char * data,int dup)292 static int append_header(hdrlist *hdrs, const char *data, int dup) {
293     if (hdrs->num == hdrs->size) {
294         unsigned int new_sz = hdrs->size ? hdrs->size * 2 : 4, i;
295         struct curl_slist *new_list = realloc(hdrs->list,
296                                               new_sz * sizeof(*new_list));
297         if (!new_list) return -1;
298         hdrs->size = new_sz;
299         hdrs->list = new_list;
300         for (i = 1; i < hdrs->num; i++) hdrs->list[i-1].next = &hdrs->list[i];
301     }
302     // Annoyingly, libcurl doesn't declare the char * as const...
303     hdrs->list[hdrs->num].data = dup ? strdup(data) : (char *) data;
304     if (!hdrs->list[hdrs->num].data) return -1;
305     if (hdrs->num > 0) hdrs->list[hdrs->num - 1].next = &hdrs->list[hdrs->num];
306     hdrs->list[hdrs->num].next = NULL;
307     hdrs->num++;
308     return 0;
309 }
310 
free_headers(hdrlist * hdrs,int completely)311 static void free_headers(hdrlist *hdrs, int completely) {
312     unsigned int i;
313     for (i = 0; i < hdrs->num; i++) {
314         free(hdrs->list[i].data);
315         hdrs->list[i].data = NULL;
316         hdrs->list[i].next = NULL;
317     }
318     hdrs->num = 0;
319     if (completely) {
320         free(hdrs->list);
321         hdrs->size = 0;
322         hdrs->list = NULL;
323     }
324 }
325 
get_header_list(hFILE_libcurl * fp)326 static struct curl_slist * get_header_list(hFILE_libcurl *fp) {
327     if (fp->headers.fixed.num > 0)
328         return &fp->headers.fixed.list[0];
329     if (fp->headers.extra.num > 0)
330         return &fp->headers.extra.list[0];
331     return 0;
332 }
333 
is_authorization(const char * hdr)334 static inline int is_authorization(const char *hdr) {
335     return (strncasecmp("authorization:", hdr, 14) == 0);
336 }
337 
add_callback_headers(hFILE_libcurl * fp)338 static int add_callback_headers(hFILE_libcurl *fp) {
339     char **hdrs = NULL, **hdr;
340 
341     if (!fp->headers.callback)
342         return 0;
343 
344     // Get the headers from the callback
345     if (fp->headers.callback(fp->headers.callback_data, &hdrs) != 0) {
346         return -1;
347     }
348 
349     if (!hdrs) // No change
350         return 0;
351 
352     // Remove any old callback headers
353     if (fp->headers.fixed.num > 0) {
354         // Unlink lists
355         fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
356     }
357     free_headers(&fp->headers.extra, 0);
358 
359     if (fp->headers.auth_hdr_num > 0 || fp->headers.auth_hdr_num == -2)
360         fp->headers.auth_hdr_num = 0; // Just removed it...
361 
362     // Convert to libcurl-suitable form
363     for (hdr = hdrs; *hdr; hdr++) {
364         if (append_header(&fp->headers.extra, *hdr, 0) < 0) {
365             goto cleanup;
366         }
367         if (is_authorization(*hdr) && !fp->headers.auth_hdr_num)
368             fp->headers.auth_hdr_num = -2;
369     }
370     for (hdr = hdrs; *hdr; hdr++) *hdr = NULL;
371 
372     if (fp->headers.fixed.num > 0 && fp->headers.extra.num > 0) {
373         // Relink lists
374         fp->headers.fixed.list[fp->headers.fixed.num - 1].next
375             = &fp->headers.extra.list[0];
376     }
377     return 0;
378 
379  cleanup:
380     while (hdr && *hdr) {
381         free(*hdr);
382         *hdr = NULL;
383     }
384     return -1;
385 }
386 
387 /*
388  * Read an OAUTH2-style Bearer access token (see
389  * https://tools.ietf.org/html/rfc6750#section-4).
390  * Returns 'v' for valid; 'i' for invalid (token missing or wrong sort);
391  * '?' for a JSON parse error; 'm' if it runs out of memory.
392  */
read_auth_json(auth_token * tok,hFILE * auth_fp)393 static int read_auth_json(auth_token *tok, hFILE *auth_fp) {
394     hts_json_token *t = hts_json_alloc_token();
395     kstring_t str = {0, 0, NULL};
396     char *token = NULL, *type = NULL, *expiry = NULL;
397     int ret = 'i';
398 
399     if (!t) goto error;
400 
401     if ((ret = hts_json_fnext(auth_fp, t, &str)) != '{') goto error;
402     while (hts_json_fnext(auth_fp, t, &str) != '}') {
403         char *key;
404         if (hts_json_token_type(t) != 's') {
405             ret = '?';
406             goto error;
407         }
408         key = hts_json_token_str(t);
409         if (!key) goto error;
410         if (strcmp(key, "access_token") == 0) {
411             if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
412             token = ks_release(&str);
413         } else if (strcmp(key, "token_type") == 0) {
414             if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
415             type = ks_release(&str);
416         } else if (strcmp(key, "expires_in") == 0) {
417             if ((ret = hts_json_fnext(auth_fp, t, &str)) != 'n') goto error;
418             expiry = ks_release(&str);
419         } else if (hts_json_fskip_value(auth_fp, '\0') != 'v') {
420             ret = '?';
421             goto error;
422         }
423     }
424 
425     if (!token || (type && strcmp(type, "Bearer") != 0)) {
426         ret = 'i';
427         goto error;
428     }
429 
430     ret = 'm';
431     str.l = 0;
432     if (kputs("Authorization: Bearer ", &str) < 0) goto error;
433     if (kputs(token, &str) < 0) goto error;
434     free(tok->token);
435     tok->token = ks_release(&str);
436     if (expiry) {
437         long exp = strtol(expiry, NULL, 10);
438         if (exp < 0) exp = 0;
439         tok->expiry = time(NULL) + exp;
440     } else {
441         tok->expiry = 0;
442     }
443     ret = 'v';
444 
445  error:
446     free(token);
447     free(type);
448     free(expiry);
449     free(str.s);
450     hts_json_free_token(t);
451     return ret;
452 }
453 
read_auth_plain(auth_token * tok,hFILE * auth_fp)454 static int read_auth_plain(auth_token *tok, hFILE *auth_fp) {
455     kstring_t line = {0, 0, NULL};
456     kstring_t token = {0, 0, NULL};
457     const char *start, *end;
458 
459     if (kgetline(&line, (char * (*)(char *, int, void *)) hgets, auth_fp) < 0) goto error;
460     if (kputc('\0', &line) < 0) goto error;
461 
462     for (start = line.s; *start && isspace_c(*start); start++) {}
463     for (end = start; *end && !isspace_c(*end); end++) {}
464 
465     if (end > start) {
466         if (kputs("Authorization: Bearer ", &token) < 0) goto error;
467         if (kputsn(start, end - start, &token) < 0) goto error;
468     }
469 
470     free(tok->token);
471     tok->token = ks_release(&token);
472     tok->expiry = 0;
473     free(line.s);
474     return 0;
475 
476  error:
477     free(line.s);
478     free(token.s);
479     return -1;
480 }
481 
renew_auth_token(auth_token * tok,int * changed)482 static int renew_auth_token(auth_token *tok, int *changed) {
483     hFILE *auth_fp = NULL;
484     char buffer[16];
485     ssize_t len;
486 
487     *changed = 0;
488     if (tok->expiry == 0 || time(NULL) + AUTH_REFRESH_EARLY_SECS < tok->expiry)
489         return 0; // Still valid
490 
491     if (tok->failed)
492         return -1;
493 
494     *changed = 1;
495     auth_fp = hopen(tok->path, "rR");
496     if (!auth_fp) {
497         // Not worried about missing files; other errors are bad.
498         if (errno != ENOENT)
499             goto fail;
500 
501         tok->expiry = 0; // Prevent retry
502         free(tok->token); // Just in case it was set
503         return 0;
504     }
505 
506     len = hpeek(auth_fp, buffer, sizeof(buffer));
507     if (len < 0)
508         goto fail;
509 
510     if (memchr(buffer, '{', len) != NULL) {
511         if (read_auth_json(tok, auth_fp) != 'v')
512             goto fail;
513     } else {
514         if (read_auth_plain(tok, auth_fp) < 0)
515             goto fail;
516     }
517 
518     return hclose(auth_fp) < 0 ? -1 : 0;
519 
520  fail:
521     tok->failed = 1;
522     if (auth_fp) hclose_abruptly(auth_fp);
523     return -1;
524 }
525 
add_auth_header(hFILE_libcurl * fp)526 static int add_auth_header(hFILE_libcurl *fp) {
527     int changed = 0;
528 
529     if (fp->headers.auth_hdr_num < 0)
530         return 0; // Have an Authorization header from open or header callback
531 
532     if (!fp->headers.auth)
533         return 0; // Nothing to add
534 
535     pthread_mutex_lock(&fp->headers.auth->lock);
536     if (renew_auth_token(fp->headers.auth, &changed) < 0)
537         goto unlock_fail;
538 
539     if (!changed && fp->headers.auth_hdr_num > 0) {
540         pthread_mutex_unlock(&fp->headers.auth->lock);
541         return 0;
542     }
543 
544     if (fp->headers.auth_hdr_num > 0) {
545         // Had a previous header, so swap in the new one
546         char *header = fp->headers.auth->token;
547         char *header_copy = header ? strdup(header) : NULL;
548         int idx = fp->headers.auth_hdr_num - 1;
549         if (header && !header_copy)
550             goto unlock_fail;
551 
552         if (header_copy) {
553             free(fp->headers.extra.list[idx].data);
554             fp->headers.extra.list[idx].data = header_copy;
555         } else {
556             unsigned int j;
557             // More complicated case - need to get rid of the old header
558             // and tidy up linked lists
559             free(fp->headers.extra.list[idx].data);
560             for (j = idx + 1; j < fp->headers.extra.num; j++) {
561                 fp->headers.extra.list[j - 1] = fp->headers.extra.list[j];
562                 fp->headers.extra.list[j - 1].next = &fp->headers.extra.list[j];
563             }
564             fp->headers.extra.num--;
565             if (fp->headers.extra.num > 0) {
566                 fp->headers.extra.list[fp->headers.extra.num-1].next = NULL;
567             } else if (fp->headers.fixed.num > 0) {
568                 fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
569             }
570             fp->headers.auth_hdr_num = 0;
571         }
572     } else if (fp->headers.auth->token) {
573         // Add new header and remember where it is
574         if (append_header(&fp->headers.extra,
575                           fp->headers.auth->token, 1) < 0) {
576             goto unlock_fail;
577         }
578         fp->headers.auth_hdr_num = fp->headers.extra.num;
579     }
580 
581     pthread_mutex_unlock(&fp->headers.auth->lock);
582     return 0;
583 
584  unlock_fail:
585     pthread_mutex_unlock(&fp->headers.auth->lock);
586     return -1;
587 }
588 
get_auth_token(hFILE_libcurl * fp,const char * url)589 static int get_auth_token(hFILE_libcurl *fp, const char *url) {
590     const char *host = NULL, *p, *q;
591     kstring_t name = {0, 0, NULL};
592     size_t host_len = 0;
593     khiter_t idx;
594     auth_token *tok = NULL;
595 
596     // Nothing to do if:
597     //   curl.auth_path has not been set
598     //   fp was made by hfile_libcurl (e.g. auth_path is a http:// url)
599     //   we already have an Authorization header
600     if (!curl.auth_path || fp->is_recursive || fp->headers.auth_hdr_num != 0)
601         return 0;
602 
603     // Insist on having a secure connection unless the user insists harder
604     if (!curl.allow_unencrypted_auth_header && strncmp(url, "https://", 8) != 0)
605         return 0;
606 
607     host = strstr(url, "://");
608     if (host) {
609         host += 3;
610         host_len = strcspn(host, "/");
611     }
612 
613     p = curl.auth_path;
614     while ((q = strstr(p, "%h")) != NULL) {
615         if (q - p > INT_MAX || host_len > INT_MAX) goto error;
616         if (kputsn_(p, q - p, &name) < 0) goto error;
617         if (kputsn_(host, host_len, &name) < 0) goto error;
618         p = q + 2;
619     }
620     if (kputs(p, &name) < 0) goto error;
621 
622     pthread_mutex_lock(&curl.auth_lock);
623     idx = kh_get(auth_map, curl.auth_map, name.s);
624     if (idx < kh_end(curl.auth_map)) {
625         tok = kh_value(curl.auth_map, idx);
626     } else {
627         tok = calloc(1, sizeof(*tok));
628         if (tok && pthread_mutex_init(&tok->lock, NULL) != 0) {
629             free(tok);
630             tok = NULL;
631         }
632         if (tok) {
633             int ret = -1;
634             tok->path = ks_release(&name);
635             tok->token = NULL;
636             tok->expiry = 1; // Force refresh
637             idx = kh_put(auth_map, curl.auth_map, tok->path, &ret);
638             if (ret < 0) {
639                 free_auth(tok);
640                 tok = NULL;
641             }
642             kh_value(curl.auth_map, idx) = tok;
643         }
644     }
645     pthread_mutex_unlock(&curl.auth_lock);
646 
647     fp->headers.auth = tok;
648     free(name.s);
649 
650     return add_auth_header(fp);
651 
652  error:
653     free(name.s);
654     return -1;
655 }
656 
process_messages(hFILE_libcurl * fp)657 static void process_messages(hFILE_libcurl *fp)
658 {
659     CURLMsg *msg;
660     int remaining;
661 
662     while ((msg = curl_multi_info_read(fp->multi, &remaining)) != NULL) {
663         switch (msg->msg) {
664         case CURLMSG_DONE:
665             fp->finished = 1;
666             fp->final_result = msg->data.result;
667             break;
668 
669         default:
670             break;
671         }
672     }
673 }
674 
wait_perform(hFILE_libcurl * fp)675 static int wait_perform(hFILE_libcurl *fp)
676 {
677     fd_set rd, wr, ex;
678     int maxfd, nrunning;
679     long timeout;
680     CURLMcode errm;
681 
682     if (!fp->perform_again) {
683         FD_ZERO(&rd);
684         FD_ZERO(&wr);
685         FD_ZERO(&ex);
686         if (curl_multi_fdset(fp->multi, &rd, &wr, &ex, &maxfd) != CURLM_OK)
687             maxfd = -1, timeout = 1000;
688         else {
689             if (curl_multi_timeout(fp->multi, &timeout) != CURLM_OK)
690                 timeout = 1000;
691             else if (timeout < 0) {
692                 timeout = 10000;  // as recommended by curl_multi_timeout(3)
693             }
694         }
695         if (maxfd < 0 && timeout > 100)
696             timeout = 100; // as recommended by curl_multi_fdset(3)
697 
698         if (timeout > 0) {
699             struct timeval tval;
700             tval.tv_sec  = (timeout / 1000);
701             tval.tv_usec = (timeout % 1000) * 1000;
702 
703             if (select(maxfd + 1, &rd, &wr, &ex, &tval) < 0) return -1;
704         }
705     }
706 
707     errm = curl_multi_perform(fp->multi, &nrunning);
708     fp->perform_again = 0;
709     if (errm == CURLM_CALL_MULTI_PERFORM) fp->perform_again = 1;
710     else if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
711 
712     if (nrunning < fp->nrunning) process_messages(fp);
713     return 0;
714 }
715 
716 
recv_callback(char * ptr,size_t size,size_t nmemb,void * fpv)717 static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
718 {
719     hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
720     size_t n = size * nmemb;
721 
722     if (n > fp->buffer.len) { fp->paused = 1; return CURL_WRITEFUNC_PAUSE; }
723     else if (n == 0) return 0;
724 
725     memcpy(fp->buffer.ptr.rd, ptr, n);
726     fp->buffer.ptr.rd += n;
727     fp->buffer.len -= n;
728     return n;
729 }
730 
libcurl_read(hFILE * fpv,void * bufferv,size_t nbytes)731 static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes)
732 {
733     hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
734     char *buffer = (char *) bufferv;
735     off_t to_skip = -1;
736     ssize_t got = 0;
737     CURLcode err;
738 
739     if (fp->delayed_seek >= 0) {
740         assert(fp->base.offset == fp->delayed_seek
741                && fp->base.begin == fp->base.buffer
742                && fp->base.end == fp->base.buffer);
743 
744         if (fp->last_offset >= 0
745             && fp->delayed_seek > fp->last_offset
746             && fp->delayed_seek - fp->last_offset < MIN_SEEK_FORWARD) {
747             // If not seeking far, just read the data and throw it away.  This
748             // is likely to be quicker than opening a new stream
749             to_skip = fp->delayed_seek - fp->last_offset;
750         } else {
751             if (restart_from_position(fp, fp->delayed_seek) < 0) {
752                 return -1;
753             }
754         }
755         fp->delayed_seek = -1;
756         fp->last_offset = -1;
757     }
758 
759     do {
760         fp->buffer.ptr.rd = buffer;
761         fp->buffer.len = nbytes;
762         fp->paused = 0;
763         err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
764         if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
765 
766         while (! fp->paused && ! fp->finished)
767             if (wait_perform(fp) < 0) return -1;
768 
769         got = fp->buffer.ptr.rd - buffer;
770 
771         if (to_skip >= 0) { // Skipping over a small seek
772             if (got < to_skip) { // Need to skip more data
773                 to_skip -= got;
774             } else {
775                 got -= to_skip;
776                 if (got > 0) {  // If enough was skipped, return the rest
777                     memmove(buffer, buffer + to_skip, got);
778                     to_skip = -1;
779                 }
780             }
781         }
782     } while (to_skip >= 0 && ! fp->finished);
783     fp->buffer.ptr.rd = NULL;
784     fp->buffer.len = 0;
785 
786     if (fp->finished && fp->final_result != CURLE_OK) {
787         errno = easy_errno(fp->easy, fp->final_result);
788         return -1;
789     }
790 
791     return got;
792 }
793 
send_callback(char * ptr,size_t size,size_t nmemb,void * fpv)794 static size_t send_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
795 {
796     hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
797     size_t n = size * nmemb;
798 
799     if (fp->buffer.len == 0) {
800         // Send buffer is empty; normally pause, or signal EOF if we're closing
801         if (fp->closing) return 0;
802         else { fp->paused = 1; return CURL_READFUNC_PAUSE; }
803     }
804 
805     if (n > fp->buffer.len) n = fp->buffer.len;
806     memcpy(ptr, fp->buffer.ptr.wr, n);
807     fp->buffer.ptr.wr += n;
808     fp->buffer.len -= n;
809     return n;
810 }
811 
libcurl_write(hFILE * fpv,const void * bufferv,size_t nbytes)812 static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes)
813 {
814     hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
815     const char *buffer = (const char *) bufferv;
816     CURLcode err;
817 
818     fp->buffer.ptr.wr = buffer;
819     fp->buffer.len = nbytes;
820     fp->paused = 0;
821     err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
822     if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
823 
824     while (! fp->paused && ! fp->finished)
825         if (wait_perform(fp) < 0) return -1;
826 
827     nbytes = fp->buffer.ptr.wr - buffer;
828     fp->buffer.ptr.wr = NULL;
829     fp->buffer.len = 0;
830 
831     if (fp->finished && fp->final_result != CURLE_OK) {
832         errno = easy_errno(fp->easy, fp->final_result);
833         return -1;
834     }
835 
836     return nbytes;
837 }
838 
libcurl_seek(hFILE * fpv,off_t offset,int whence)839 static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence)
840 {
841     hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
842     off_t origin, pos;
843 
844     if (!fp->is_read || !fp->can_seek) {
845         // Cowardly refuse to seek when writing or a previous seek failed.
846         errno = ESPIPE;
847         return -1;
848     }
849 
850     switch (whence) {
851     case SEEK_SET:
852         origin = 0;
853         break;
854     case SEEK_CUR:
855         errno = ENOSYS;
856         return -1;
857     case SEEK_END:
858         if (fp->file_size < 0) { errno = ESPIPE; return -1; }
859         origin = fp->file_size;
860         break;
861     default:
862         errno = EINVAL;
863         return -1;
864     }
865 
866     // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow
867     if ((offset < 0)? origin + offset < 0
868                 : (fp->file_size >= 0 && offset > fp->file_size - origin)) {
869         errno = EINVAL;
870         return -1;
871     }
872 
873     pos = origin + offset;
874 
875     if (fp->tried_seek) {
876         /* Seeking has worked at least once, so now we can delay doing
877            the actual work until the next read.  This avoids lots of pointless
878            http or ftp reconnections if the caller does lots of seeks
879            without any intervening reads. */
880         if (fp->delayed_seek < 0) {
881             fp->last_offset = fp->base.offset + (fp->base.end - fp->base.buffer);
882         }
883         fp->delayed_seek = pos;
884         return pos;
885     }
886 
887     if (restart_from_position(fp, pos) < 0) {
888         /* This value for errno may not be entirely true, but the caller may be
889            able to carry on with the existing handle. */
890         errno = ESPIPE;
891         return -1;
892     }
893 
894     fp->tried_seek = 1;
895     return pos;
896 }
897 
restart_from_position(hFILE_libcurl * fp,off_t pos)898 static int restart_from_position(hFILE_libcurl *fp, off_t pos) {
899     hFILE_libcurl temp_fp;
900     CURLcode err;
901     CURLMcode errm;
902     int update_headers = 0;
903     int save_errno = 0;
904 
905     // TODO If we seem to be doing random access, use CURLOPT_RANGE to do
906     // limited reads (e.g. about a BAM block!) so seeking can reuse the
907     // existing connection more often.
908 
909     // Get new headers from the callback (if defined).  This changes the
910     // headers in fp before it gets duplicated, but they should be have been
911     // sent by now.
912 
913     if (fp->headers.callback) {
914         if (add_callback_headers(fp) != 0)
915             return -1;
916         update_headers = 1;
917     }
918     if (fp->headers.auth_hdr_num > 0 && fp->headers.auth) {
919         if (add_auth_header(fp) != 0)
920             return -1;
921         update_headers = 1;
922     }
923     if (update_headers) {
924         struct curl_slist *list = get_header_list(fp);
925         if (list) {
926             err = curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
927             if (err != CURLE_OK) {
928                 errno = easy_errno(fp->easy,err);
929                 return -1;
930             }
931         }
932     }
933 
934     /*
935       Duplicate the easy handle, and use CURLOPT_RESUME_FROM_LARGE to open
936       a new request to the server, reading from the location that we want
937       to seek to.  If the new request works and returns the correct data,
938       the original easy handle in *fp is closed and replaced with the new
939       one.  If not, we close the new handle and leave *fp unchanged.
940      */
941 
942     memcpy(&temp_fp, fp, sizeof(temp_fp));
943     temp_fp.buffer.len = 0;
944     temp_fp.buffer.ptr.rd = NULL;
945     temp_fp.easy = curl_easy_duphandle(fp->easy);
946     if (!temp_fp.easy)
947         goto early_error;
948 
949     err = curl_easy_setopt(temp_fp.easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos);
950     err |= curl_easy_setopt(temp_fp.easy, CURLOPT_PRIVATE, &temp_fp);
951     err |= curl_easy_setopt(temp_fp.easy, CURLOPT_WRITEDATA, &temp_fp);
952     if (err != CURLE_OK) {
953         save_errno = easy_errno(temp_fp.easy, err);
954         goto error;
955     }
956 
957     temp_fp.buffer.len = 0;  // Ensures we only read the response headers
958     temp_fp.paused = temp_fp.finished = 0;
959 
960     // fp->multi and temp_fp.multi are the same.
961     errm = curl_multi_add_handle(fp->multi, temp_fp.easy);
962     if (errm != CURLM_OK) {
963         save_errno = multi_errno(errm);
964         goto error;
965     }
966     temp_fp.nrunning = ++fp->nrunning;
967 
968     err = curl_easy_pause(temp_fp.easy, CURLPAUSE_CONT);
969     if (err != CURLE_OK) {
970         save_errno = easy_errno(temp_fp.easy, err);
971         goto error_remove;
972     }
973 
974     while (! temp_fp.paused && ! temp_fp.finished)
975         if (wait_perform(&temp_fp) < 0) {
976             save_errno = errno;
977             goto error_remove;
978         }
979 
980     if (temp_fp.finished && temp_fp.final_result != CURLE_OK) {
981         save_errno = easy_errno(temp_fp.easy, temp_fp.final_result);
982         goto error_remove;
983     }
984 
985     // We've got a good response, close the original connection and
986     // replace it with the new one.
987 
988     errm = curl_multi_remove_handle(fp->multi, fp->easy);
989     if (errm != CURLM_OK) {
990         // Clean up as much as possible
991         curl_easy_reset(temp_fp.easy);
992         if (curl_multi_remove_handle(fp->multi, temp_fp.easy) == CURLM_OK) {
993             fp->nrunning--;
994             curl_easy_cleanup(temp_fp.easy);
995         }
996         save_errno = multi_errno(errm);
997         goto early_error;
998     }
999     fp->nrunning--;
1000 
1001     curl_easy_cleanup(fp->easy);
1002     fp->easy = temp_fp.easy;
1003     err = curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1004     err |= curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1005     if (err != CURLE_OK) {
1006         save_errno = easy_errno(fp->easy, err);
1007         curl_easy_reset(fp->easy);
1008         errno = save_errno;
1009         return -1;
1010     }
1011     fp->buffer.len = 0;
1012     fp->paused = temp_fp.paused;
1013     fp->finished = temp_fp.finished;
1014     fp->perform_again = temp_fp.perform_again;
1015     fp->final_result = temp_fp.final_result;
1016 
1017     return 0;
1018 
1019  error_remove:
1020     curl_easy_reset(temp_fp.easy); // Ensure no pointers to on-stack temp_fp
1021     errm = curl_multi_remove_handle(fp->multi, temp_fp.easy);
1022     if (errm != CURLM_OK) {
1023         errno = multi_errno(errm);
1024         return -1;
1025     }
1026     fp->nrunning--;
1027  error:
1028     curl_easy_cleanup(temp_fp.easy);
1029  early_error:
1030     fp->can_seek = 0;  // Don't try to seek again
1031     if (save_errno)
1032         errno = save_errno;
1033     return -1;
1034 }
1035 
libcurl_close(hFILE * fpv)1036 static int libcurl_close(hFILE *fpv)
1037 {
1038     hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
1039     CURLcode err;
1040     CURLMcode errm;
1041     int save_errno = 0;
1042 
1043     // Before closing the file, unpause it and perform on it so that uploads
1044     // have the opportunity to signal EOF to the server -- see send_callback().
1045 
1046     fp->buffer.len = 0;
1047     fp->closing = 1;
1048     fp->paused = 0;
1049     err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
1050     if (err != CURLE_OK) save_errno = easy_errno(fp->easy, err);
1051 
1052     while (save_errno == 0 && ! fp->paused && ! fp->finished)
1053         if (wait_perform(fp) < 0) save_errno = errno;
1054 
1055     if (fp->finished && fp->final_result != CURLE_OK)
1056         save_errno = easy_errno(fp->easy, fp->final_result);
1057 
1058     errm = curl_multi_remove_handle(fp->multi, fp->easy);
1059     if (errm != CURLM_OK && save_errno == 0) save_errno = multi_errno(errm);
1060     fp->nrunning--;
1061 
1062     curl_easy_cleanup(fp->easy);
1063     curl_multi_cleanup(fp->multi);
1064 
1065     if (fp->headers.callback) // Tell callback to free any data it needs to
1066         fp->headers.callback(fp->headers.callback_data, NULL);
1067     free_headers(&fp->headers.fixed, 1);
1068     free_headers(&fp->headers.extra, 1);
1069 
1070     if (save_errno) { errno = save_errno; return -1; }
1071     else return 0;
1072 }
1073 
1074 static const struct hFILE_backend libcurl_backend =
1075 {
1076     libcurl_read, libcurl_write, libcurl_seek, NULL, libcurl_close
1077 };
1078 
1079 static hFILE *
libcurl_open(const char * url,const char * modes,http_headers * headers)1080 libcurl_open(const char *url, const char *modes, http_headers *headers)
1081 {
1082     hFILE_libcurl *fp;
1083     struct curl_slist *list;
1084     char mode;
1085     const char *s;
1086     CURLcode err;
1087     CURLMcode errm;
1088     int save, is_recursive;
1089 
1090     is_recursive = strchr(modes, 'R') != NULL;
1091 
1092     if ((s = strpbrk(modes, "rwa+")) != NULL) {
1093         mode = *s;
1094         if (strpbrk(&s[1], "rwa+")) mode = 'e';
1095     }
1096     else mode = '\0';
1097 
1098     if (mode != 'r' && mode != 'w') { errno = EINVAL; goto early_error; }
1099 
1100     fp = (hFILE_libcurl *) hfile_init(sizeof (hFILE_libcurl), modes, 0);
1101     if (fp == NULL) goto early_error;
1102 
1103     if (headers) {
1104         fp->headers = *headers;
1105     } else {
1106         memset(&fp->headers, 0, sizeof(fp->headers));
1107     }
1108 
1109     fp->file_size = -1;
1110     fp->buffer.ptr.rd = NULL;
1111     fp->buffer.len = 0;
1112     fp->final_result = (CURLcode) -1;
1113     fp->paused = fp->closing = fp->finished = fp->perform_again = 0;
1114     fp->can_seek = 1;
1115     fp->tried_seek = 0;
1116     fp->delayed_seek = fp->last_offset = -1;
1117     fp->is_recursive = is_recursive;
1118     fp->nrunning = 0;
1119     fp->easy = NULL;
1120 
1121     fp->multi = curl_multi_init();
1122     if (fp->multi == NULL) { errno = ENOMEM; goto error; }
1123 
1124     fp->easy = curl_easy_init();
1125     if (fp->easy == NULL) { errno = ENOMEM; goto error; }
1126 
1127     // Make a route to the hFILE_libcurl* given just a CURL* easy handle
1128     err = curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1129 
1130     // Avoid many repeated CWD calls with FTP, instead requesting the filename
1131     // by full path (as done in knet, but not strictly compliant with RFC1738).
1132     err |= curl_easy_setopt(fp->easy, CURLOPT_FTP_FILEMETHOD, CURLFTPMETHOD_NOCWD);
1133 
1134     if (mode == 'r') {
1135         err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback);
1136         err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1137         fp->is_read = 1;
1138     }
1139     else {
1140         err |= curl_easy_setopt(fp->easy, CURLOPT_READFUNCTION, send_callback);
1141         err |= curl_easy_setopt(fp->easy, CURLOPT_READDATA, fp);
1142         err |= curl_easy_setopt(fp->easy, CURLOPT_UPLOAD, 1L);
1143         if (append_header(&fp->headers.fixed,
1144                           "Transfer-Encoding: chunked", 1) < 0)
1145             goto error;
1146         fp->is_read = 0;
1147     }
1148 
1149     err |= curl_easy_setopt(fp->easy, CURLOPT_SHARE, curl.share);
1150     err |= curl_easy_setopt(fp->easy, CURLOPT_URL, url);
1151     {
1152         char* env_curl_ca_bundle = getenv("CURL_CA_BUNDLE");
1153         if (env_curl_ca_bundle) {
1154             err |= curl_easy_setopt(fp->easy, CURLOPT_CAINFO, env_curl_ca_bundle);
1155         }
1156     }
1157     err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s);
1158     if (fp->headers.callback) {
1159         if (add_callback_headers(fp) != 0) goto error;
1160     }
1161     if (get_auth_token(fp, url) < 0)
1162         goto error;
1163     if ((list = get_header_list(fp)) != NULL)
1164         err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1165     err |= curl_easy_setopt(fp->easy, CURLOPT_FOLLOWLOCATION, 1L);
1166     if (hts_verbose <= 8)
1167         err |= curl_easy_setopt(fp->easy, CURLOPT_FAILONERROR, 1L);
1168     if (hts_verbose >= 8)
1169         err |= curl_easy_setopt(fp->easy, CURLOPT_VERBOSE, 1L);
1170 
1171     if (err != 0) { errno = ENOSYS; goto error; }
1172 
1173     errm = curl_multi_add_handle(fp->multi, fp->easy);
1174     if (errm != CURLM_OK) { errno = multi_errno(errm); goto error; }
1175     fp->nrunning++;
1176 
1177     while (! fp->paused && ! fp->finished)
1178         if (wait_perform(fp) < 0) goto error_remove;
1179 
1180     if (fp->finished && fp->final_result != CURLE_OK) {
1181         errno = easy_errno(fp->easy, fp->final_result);
1182         goto error_remove;
1183     }
1184 
1185     if (mode == 'r') {
1186         double dval;
1187         if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD,
1188                               &dval) == CURLE_OK && dval >= 0.0)
1189             fp->file_size = (off_t) (dval + 0.1);
1190     }
1191 
1192     fp->base.backend = &libcurl_backend;
1193     return &fp->base;
1194 
1195 error_remove:
1196     save = errno;
1197     (void) curl_multi_remove_handle(fp->multi, fp->easy);
1198     fp->nrunning--;
1199     errno = save;
1200 
1201 error:
1202     save = errno;
1203     if (fp->easy) curl_easy_cleanup(fp->easy);
1204     if (fp->multi) curl_multi_cleanup(fp->multi);
1205     free_headers(&fp->headers.extra, 1);
1206     hfile_destroy((hFILE *) fp);
1207     errno = save;
1208     return NULL;
1209 
1210 early_error:
1211     return NULL;
1212 }
1213 
hopen_libcurl(const char * url,const char * modes)1214 static hFILE *hopen_libcurl(const char *url, const char *modes)
1215 {
1216     return libcurl_open(url, modes, NULL);
1217 }
1218 
parse_va_list(http_headers * headers,va_list args)1219 static int parse_va_list(http_headers *headers, va_list args)
1220 {
1221     const char *argtype;
1222 
1223     while ((argtype = va_arg(args, const char *)) != NULL)
1224         if (strcmp(argtype, "httphdr:v") == 0) {
1225             const char **hdr;
1226             for (hdr = va_arg(args, const char **); *hdr; hdr++) {
1227                 if (append_header(&headers->fixed, *hdr, 1) < 0)
1228                     return -1;
1229                 if (is_authorization(*hdr))
1230                     headers->auth_hdr_num = -1;
1231             }
1232         }
1233         else if (strcmp(argtype, "httphdr:l") == 0) {
1234             const char *hdr;
1235             while ((hdr = va_arg(args, const char *)) != NULL) {
1236                 if (append_header(&headers->fixed, hdr, 1) < 0)
1237                     return -1;
1238                 if (is_authorization(hdr))
1239                     headers->auth_hdr_num = -1;
1240             }
1241         }
1242         else if (strcmp(argtype, "httphdr") == 0) {
1243             const char *hdr = va_arg(args, const char *);
1244             if (hdr) {
1245                 if (append_header(&headers->fixed, hdr, 1) < 0)
1246                     return -1;
1247                 if (is_authorization(hdr))
1248                     headers->auth_hdr_num = -1;
1249             }
1250         }
1251         else if (strcmp(argtype, "httphdr_callback") == 0) {
1252             headers->callback = va_arg(args, const hts_httphdr_callback);
1253         }
1254         else if (strcmp(argtype, "httphdr_callback_data") == 0) {
1255             headers->callback_data = va_arg(args, void *);
1256         }
1257         else if (strcmp(argtype, "va_list") == 0) {
1258             va_list *args2 = va_arg(args, va_list *);
1259             if (args2) {
1260                 if (parse_va_list(headers, *args2) < 0) return -1;
1261             }
1262         }
1263         else if (strcmp(argtype, "auth_token_enabled") == 0) {
1264             const char *flag = va_arg(args, const char *);
1265             if (strcmp(flag, "false") == 0)
1266                 headers->auth_hdr_num = -3;
1267         }
1268         else { errno = EINVAL; return -1; }
1269 
1270     return 0;
1271 }
1272 
1273 /*
1274   HTTP headers to be added to the request can be passed in as extra
1275   arguments to hopen().  The headers can be specified as follows:
1276 
1277   * Single header:
1278     hopen(url, mode, "httphdr", "X-Hdr-1: text", NULL);
1279 
1280   * Multiple headers in the argument list:
1281     hopen(url, mode, "httphdr:l", "X-Hdr-1: text", "X-Hdr-2: text", NULL, NULL);
1282 
1283   * Multiple headers in a char* array:
1284     hopen(url, mode, "httphdr:v", hdrs, NULL);
1285     where `hdrs` is a char **.  The list ends with a NULL pointer.
1286 
1287   * A callback function
1288     hopen(url, mode, "httphdr_callback", func,
1289                      "httphdr_callback_data", arg, NULL);
1290     `func` has type
1291          int (* hts_httphdr_callback) (void *cb_data, char ***hdrs);
1292     `arg` is passed to the callback as a void *.
1293 
1294     The function is called at file open, and when attempting to seek (which
1295     opens a new HTTP request).  This allows, for example, access tokens
1296     that may have gone stale to be regenerated.  The function is also
1297     called (with `hdrs` == NULL) on file close so that the callback can
1298     free any memory that it needs to.
1299 
1300     The callback should return 0 on success, non-zero on failure.  It should
1301     return in *hdrs a list of strings containing the new headers (terminated
1302     with a NULL pointer).  These will replace any headers previously supplied
1303     by the callback.  If no changes are necessary, it can return NULL
1304     in *hdrs, in which case the previous headers will be left unchanged.
1305 
1306     Ownership of the strings in the header list passes to hfile_libcurl,
1307     so the callback should not attempt to use or free them itself.  The memory
1308     containing the array belongs to the callback and will not be freed by
1309     hfile_libcurl.
1310 
1311     Headers supplied by the callback are appended after any specified
1312     using the "httphdr", "httphdr:l" or "httphdr:v" methods.  No attempt
1313     is made to replace these headers (even if a key is repeated) so anything
1314     that is expected to vary needs to come from the callback.
1315  */
1316 
vhopen_libcurl(const char * url,const char * modes,va_list args)1317 static hFILE *vhopen_libcurl(const char *url, const char *modes, va_list args)
1318 {
1319     hFILE *fp = NULL;
1320     http_headers headers = { { NULL, 0, 0 }, { NULL, 0, 0 }, NULL, NULL };
1321     if (parse_va_list(&headers, args) == 0) {
1322         fp = libcurl_open(url, modes, &headers);
1323     }
1324 
1325     if (!fp) {
1326         free_headers(&headers.fixed, 1);
1327     }
1328     return fp;
1329 }
1330 
PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)1331 int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self)
1332 {
1333     static const struct hFILE_scheme_handler handler =
1334         { hopen_libcurl, hfile_always_remote, "libcurl",
1335           2000 + 50,
1336           vhopen_libcurl };
1337 
1338 #ifdef ENABLE_PLUGINS
1339     // Embed version string for examination via strings(1) or what(1)
1340     static const char id[] = "@(#)hfile_libcurl plugin (htslib)\t" HTS_VERSION;
1341     const char *version = strchr(id, '\t')+1;
1342 #else
1343     const char *version = hts_version();
1344 #endif
1345     const curl_version_info_data *info;
1346     const char * const *protocol;
1347     const char *auth;
1348     CURLcode err;
1349     CURLSHcode errsh;
1350 
1351     err = curl_global_init(CURL_GLOBAL_ALL);
1352     if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; }
1353 
1354     curl.share = curl_share_init();
1355     if (curl.share == NULL) { curl_global_cleanup(); errno = EIO; return -1; }
1356     errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
1357     errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
1358     errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
1359     if (errsh != 0) {
1360         curl_share_cleanup(curl.share);
1361         curl_global_cleanup();
1362         errno = EIO;
1363         return -1;
1364     }
1365 
1366     if ((auth = getenv("HTS_AUTH_LOCATION")) != NULL) {
1367         curl.auth_path = strdup(auth);
1368         curl.auth_map = kh_init(auth_map);
1369         if (!curl.auth_path || !curl.auth_map) {
1370             int save_errno = errno;
1371             free(curl.auth_path);
1372             kh_destroy(auth_map, curl.auth_map);
1373             curl_share_cleanup(curl.share);
1374             curl_global_cleanup();
1375             errno = save_errno;
1376             return -1;
1377         }
1378     }
1379     if ((auth = getenv("HTS_ALLOW_UNENCRYPTED_AUTHORIZATION_HEADER")) != NULL
1380         && strcmp(auth, "I understand the risks") == 0) {
1381         curl.allow_unencrypted_auth_header = 1;
1382     }
1383 
1384     info = curl_version_info(CURLVERSION_NOW);
1385     ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
1386 
1387     self->name = "libcurl";
1388     self->destroy = libcurl_exit;
1389 
1390     for (protocol = info->protocols; *protocol; protocol++)
1391         hfile_add_scheme_handler(*protocol, &handler);
1392     return 0;
1393 }
1394