1 /*  hfile_libcurl.c -- libcurl backend for low-level file streams.
2 
3     Copyright (C) 2015-2017, 2019-2020 Genome Research Ltd.
4 
5     Author: John Marshall <jm18@sanger.ac.uk>
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE.  */
24 
25 #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26 #include <config.h>
27 
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <strings.h>
32 #include <errno.h>
33 #include <pthread.h>
34 #ifndef _WIN32
35 # include <sys/select.h>
36 #endif
37 #include <assert.h>
38 
39 #include "hfile_internal.h"
40 #ifdef ENABLE_PLUGINS
41 #include "version.h"
42 #endif
43 #include "htslib/hts.h"  // for hts_version() and hts_verbose
44 #include "htslib/kstring.h"
45 #include "htslib/khash.h"
46 
47 #include <curl/curl.h>
48 
49 // Number of seconds to take off auth_token expiry, to allow for clock skew
50 // and slow servers
51 #define AUTH_REFRESH_EARLY_SECS 60
52 
53 // Minimum number of bytes to skip when seeking forward.  Seeks less than
54 // this will just read the data and throw it away.  The optimal value
55 // depends on how long it takes to make a new connection compared
56 // to how fast the data arrives.
57 #define MIN_SEEK_FORWARD 1000000
58 
59 typedef struct {
60     char *path;
61     char *token;
62     time_t expiry;
63     int failed;
64     pthread_mutex_t lock;
65 } auth_token;
66 
67 // For the authorization header cache
68 KHASH_MAP_INIT_STR(auth_map, auth_token *)
69 
70 // Curl-compatible header linked list
71 typedef struct {
72     struct curl_slist *list;
73     unsigned int num;
74     unsigned int size;
75 } hdrlist;
76 
77 typedef struct {
78     hdrlist fixed;                   // List of headers supplied at hopen()
79     hdrlist extra;                   // List of headers from callback
80     hts_httphdr_callback callback;   // Callback to get more headers
81     void *callback_data;             // Data to pass to httphdr callback
82     auth_token *auth;                // Authentication token
83     int auth_hdr_num;                // Location of auth_token in hdrlist extra
84                                      // If -1, Authorization header is in fixed
85                                      //    -2, it came from the callback
86                                      //    -3, "auth_token_enabled", "false"
87                                      //        passed to hopen()
88     redirect_callback redirect;      // Callback to handle 3xx redirects
89     void *redirect_data;             // Data to pass to redirect_callback
90     long *http_response_ptr;         // Location to store http response code.
91     int fail_on_error;               // Open fails on >400 response code
92                                      //    (default true)
93 } http_headers;
94 
95 typedef struct {
96     hFILE base;
97     CURL *easy;
98     CURLM *multi;
99     off_t file_size;
100     struct {
101         union { char *rd; const char *wr; } ptr;
102         size_t len;
103     } buffer;
104     CURLcode final_result;  // easy result code for finished transfers
105     // Flags for communicating with libcurl callbacks:
106     unsigned paused : 1;    // callback tells us that it has paused transfer
107     unsigned closing : 1;   // informs callback that hclose() has been invoked
108     unsigned finished : 1;  // wait_perform() tells us transfer is complete
109     unsigned perform_again : 1;
110     unsigned is_read : 1;   // Opened in read mode
111     unsigned can_seek : 1;  // Can (attempt to) seek on this handle
112     unsigned is_recursive:1; // Opened by hfile_libcurl itself
113     unsigned tried_seek : 1; // At least one seek has been attempted
114     int nrunning;
115     http_headers headers;
116 
117     off_t delayed_seek;      // Location to seek to before reading
118     off_t last_offset;       // Location we're seeking from
119     char *preserved;         // Preserved buffer content on seek
120     size_t preserved_bytes;  // Number of preserved bytes
121     size_t preserved_size;   // Size of preserved buffer
122 } hFILE_libcurl;
123 
124 static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence);
125 static int restart_from_position(hFILE_libcurl *fp, off_t pos);
126 
http_status_errno(int status)127 static int http_status_errno(int status)
128 {
129     if (status >= 500)
130         switch (status) {
131         case 501: return ENOSYS;
132         case 503: return EBUSY;
133         case 504: return ETIMEDOUT;
134         default:  return EIO;
135         }
136     else if (status >= 400)
137         switch (status) {
138         case 401: return EPERM;
139         case 403: return EACCES;
140         case 404: return ENOENT;
141         case 405: return EROFS;
142         case 407: return EPERM;
143         case 408: return ETIMEDOUT;
144         case 410: return ENOENT;
145         default:  return EINVAL;
146         }
147     else return 0;
148 }
149 
easy_errno(CURL * easy,CURLcode err)150 static int easy_errno(CURL *easy, CURLcode err)
151 {
152     long lval;
153 
154     switch (err) {
155     case CURLE_OK:
156         return 0;
157 
158     case CURLE_UNSUPPORTED_PROTOCOL:
159     case CURLE_URL_MALFORMAT:
160         return EINVAL;
161 
162 #if LIBCURL_VERSION_NUM >= 0x071505
163     case CURLE_NOT_BUILT_IN:
164         return ENOSYS;
165 #endif
166 
167     case CURLE_COULDNT_RESOLVE_PROXY:
168     case CURLE_COULDNT_RESOLVE_HOST:
169     case CURLE_FTP_CANT_GET_HOST:
170         return EDESTADDRREQ; // Lookup failure
171 
172     case CURLE_COULDNT_CONNECT:
173     case CURLE_SEND_ERROR:
174     case CURLE_RECV_ERROR:
175         if (curl_easy_getinfo(easy, CURLINFO_OS_ERRNO, &lval) == CURLE_OK)
176             return lval;
177         else
178             return ECONNABORTED;
179 
180     case CURLE_REMOTE_ACCESS_DENIED:
181     case CURLE_LOGIN_DENIED:
182     case CURLE_TFTP_PERM:
183         return EACCES;
184 
185     case CURLE_PARTIAL_FILE:
186         return EPIPE;
187 
188     case CURLE_HTTP_RETURNED_ERROR:
189         if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &lval) == CURLE_OK)
190             return http_status_errno(lval);
191         else
192             return EIO;
193 
194     case CURLE_OUT_OF_MEMORY:
195         return ENOMEM;
196 
197     case CURLE_OPERATION_TIMEDOUT:
198         return ETIMEDOUT;
199 
200     case CURLE_RANGE_ERROR:
201         return ESPIPE;
202 
203     case CURLE_SSL_CONNECT_ERROR:
204         // TODO return SSL error buffer messages
205         return ECONNABORTED;
206 
207     case CURLE_FILE_COULDNT_READ_FILE:
208     case CURLE_TFTP_NOTFOUND:
209         return ENOENT;
210 
211     case CURLE_TOO_MANY_REDIRECTS:
212         return ELOOP;
213 
214     case CURLE_FILESIZE_EXCEEDED:
215         return EFBIG;
216 
217     case CURLE_REMOTE_DISK_FULL:
218         return ENOSPC;
219 
220     case CURLE_REMOTE_FILE_EXISTS:
221         return EEXIST;
222 
223     default:
224         hts_log_error("Libcurl reported error %d (%s)", (int) err,
225                       curl_easy_strerror(err));
226         return EIO;
227     }
228 }
229 
multi_errno(CURLMcode errm)230 static int multi_errno(CURLMcode errm)
231 {
232     switch (errm) {
233     case CURLM_CALL_MULTI_PERFORM:
234     case CURLM_OK:
235         return 0;
236 
237     case CURLM_BAD_HANDLE:
238     case CURLM_BAD_EASY_HANDLE:
239     case CURLM_BAD_SOCKET:
240         return EBADF;
241 
242     case CURLM_OUT_OF_MEMORY:
243         return ENOMEM;
244 
245     default:
246         hts_log_error("Libcurl reported error %d (%s)", (int) errm,
247                       curl_multi_strerror(errm));
248         return EIO;
249     }
250 }
251 
252 static struct {
253     kstring_t useragent;
254     CURLSH *share;
255     char *auth_path;
256     khash_t(auth_map) *auth_map;
257     int allow_unencrypted_auth_header;
258     pthread_mutex_t auth_lock;
259     pthread_mutex_t share_lock;
260 } curl = { { 0, 0, NULL }, NULL, NULL, NULL, 0, PTHREAD_MUTEX_INITIALIZER,
261            PTHREAD_MUTEX_INITIALIZER };
262 
share_lock(CURL * handle,curl_lock_data data,curl_lock_access access,void * userptr)263 static void share_lock(CURL *handle, curl_lock_data data,
264                        curl_lock_access access, void *userptr) {
265     pthread_mutex_lock(&curl.share_lock);
266 }
267 
share_unlock(CURL * handle,curl_lock_data data,void * userptr)268 static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
269     pthread_mutex_unlock(&curl.share_lock);
270 }
271 
free_auth(auth_token * tok)272 static void free_auth(auth_token *tok) {
273     if (!tok) return;
274     if (pthread_mutex_destroy(&tok->lock)) abort();
275     free(tok->path);
276     free(tok->token);
277     free(tok);
278 }
279 
libcurl_exit()280 static void libcurl_exit()
281 {
282     if (curl_share_cleanup(curl.share) == CURLSHE_OK)
283         curl.share = NULL;
284 
285     free(curl.useragent.s);
286     curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
287 
288     free(curl.auth_path);
289     curl.auth_path = NULL;
290 
291     if (curl.auth_map) {
292         khiter_t i;
293         for (i = kh_begin(curl.auth_map); i != kh_end(curl.auth_map); ++i) {
294             if (kh_exist(curl.auth_map, i)) {
295                 free_auth(kh_value(curl.auth_map, i));
296                 kh_key(curl.auth_map, i) = NULL;
297                 kh_value(curl.auth_map, i) = NULL;
298             }
299         }
300         kh_destroy(auth_map, curl.auth_map);
301         curl.auth_map = NULL;
302     }
303 
304     curl_global_cleanup();
305 }
306 
append_header(hdrlist * hdrs,const char * data,int dup)307 static int append_header(hdrlist *hdrs, const char *data, int dup) {
308     if (hdrs->num == hdrs->size) {
309         unsigned int new_sz = hdrs->size ? hdrs->size * 2 : 4, i;
310         struct curl_slist *new_list = realloc(hdrs->list,
311                                               new_sz * sizeof(*new_list));
312         if (!new_list) return -1;
313         hdrs->size = new_sz;
314         hdrs->list = new_list;
315         for (i = 1; i < hdrs->num; i++) hdrs->list[i-1].next = &hdrs->list[i];
316     }
317     // Annoyingly, libcurl doesn't declare the char * as const...
318     hdrs->list[hdrs->num].data = dup ? strdup(data) : (char *) data;
319     if (!hdrs->list[hdrs->num].data) return -1;
320     if (hdrs->num > 0) hdrs->list[hdrs->num - 1].next = &hdrs->list[hdrs->num];
321     hdrs->list[hdrs->num].next = NULL;
322     hdrs->num++;
323     return 0;
324 }
325 
free_headers(hdrlist * hdrs,int completely)326 static void free_headers(hdrlist *hdrs, int completely) {
327     unsigned int i;
328     for (i = 0; i < hdrs->num; i++) {
329         free(hdrs->list[i].data);
330         hdrs->list[i].data = NULL;
331         hdrs->list[i].next = NULL;
332     }
333     hdrs->num = 0;
334     if (completely) {
335         free(hdrs->list);
336         hdrs->size = 0;
337         hdrs->list = NULL;
338     }
339 }
340 
get_header_list(hFILE_libcurl * fp)341 static struct curl_slist * get_header_list(hFILE_libcurl *fp) {
342     if (fp->headers.fixed.num > 0)
343         return &fp->headers.fixed.list[0];
344     if (fp->headers.extra.num > 0)
345         return &fp->headers.extra.list[0];
346     return 0;
347 }
348 
is_authorization(const char * hdr)349 static inline int is_authorization(const char *hdr) {
350     return (strncasecmp("authorization:", hdr, 14) == 0);
351 }
352 
add_callback_headers(hFILE_libcurl * fp)353 static int add_callback_headers(hFILE_libcurl *fp) {
354     char **hdrs = NULL, **hdr;
355 
356     if (!fp->headers.callback)
357         return 0;
358 
359     // Get the headers from the callback
360     if (fp->headers.callback(fp->headers.callback_data, &hdrs) != 0) {
361         return -1;
362     }
363 
364     if (!hdrs) // No change
365         return 0;
366 
367     // Remove any old callback headers
368     if (fp->headers.fixed.num > 0) {
369         // Unlink lists
370         fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
371     }
372     free_headers(&fp->headers.extra, 0);
373 
374     if (fp->headers.auth_hdr_num > 0 || fp->headers.auth_hdr_num == -2)
375         fp->headers.auth_hdr_num = 0; // Just removed it...
376 
377     // Convert to libcurl-suitable form
378     for (hdr = hdrs; *hdr; hdr++) {
379         if (append_header(&fp->headers.extra, *hdr, 0) < 0) {
380             goto cleanup;
381         }
382         if (is_authorization(*hdr) && !fp->headers.auth_hdr_num)
383             fp->headers.auth_hdr_num = -2;
384     }
385     for (hdr = hdrs; *hdr; hdr++) *hdr = NULL;
386 
387     if (fp->headers.fixed.num > 0 && fp->headers.extra.num > 0) {
388         // Relink lists
389         fp->headers.fixed.list[fp->headers.fixed.num - 1].next
390             = &fp->headers.extra.list[0];
391     }
392     return 0;
393 
394  cleanup:
395     while (hdr && *hdr) {
396         free(*hdr);
397         *hdr = NULL;
398     }
399     return -1;
400 }
401 
402 /*
403  * Read an OAUTH2-style Bearer access token (see
404  * https://tools.ietf.org/html/rfc6750#section-4).
405  * Returns 'v' for valid; 'i' for invalid (token missing or wrong sort);
406  * '?' for a JSON parse error; 'm' if it runs out of memory.
407  */
read_auth_json(auth_token * tok,hFILE * auth_fp)408 static int read_auth_json(auth_token *tok, hFILE *auth_fp) {
409     hts_json_token *t = hts_json_alloc_token();
410     kstring_t str = {0, 0, NULL};
411     char *token = NULL, *type = NULL, *expiry = NULL;
412     int ret = 'i';
413 
414     if (!t) goto error;
415 
416     if ((ret = hts_json_fnext(auth_fp, t, &str)) != '{') goto error;
417     while (hts_json_fnext(auth_fp, t, &str) != '}') {
418         char *key;
419         if (hts_json_token_type(t) != 's') {
420             ret = '?';
421             goto error;
422         }
423         key = hts_json_token_str(t);
424         if (!key) goto error;
425         if (strcmp(key, "access_token") == 0) {
426             if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
427             token = ks_release(&str);
428         } else if (strcmp(key, "token_type") == 0) {
429             if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
430             type = ks_release(&str);
431         } else if (strcmp(key, "expires_in") == 0) {
432             if ((ret = hts_json_fnext(auth_fp, t, &str)) != 'n') goto error;
433             expiry = ks_release(&str);
434         } else if (hts_json_fskip_value(auth_fp, '\0') != 'v') {
435             ret = '?';
436             goto error;
437         }
438     }
439 
440     if (!token || (type && strcmp(type, "Bearer") != 0)) {
441         ret = 'i';
442         goto error;
443     }
444 
445     ret = 'm';
446     str.l = 0;
447     if (kputs("Authorization: Bearer ", &str) < 0) goto error;
448     if (kputs(token, &str) < 0) goto error;
449     free(tok->token);
450     tok->token = ks_release(&str);
451     if (expiry) {
452         long exp = strtol(expiry, NULL, 10);
453         if (exp < 0) exp = 0;
454         tok->expiry = time(NULL) + exp;
455     } else {
456         tok->expiry = 0;
457     }
458     ret = 'v';
459 
460  error:
461     free(token);
462     free(type);
463     free(expiry);
464     free(str.s);
465     hts_json_free_token(t);
466     return ret;
467 }
468 
read_auth_plain(auth_token * tok,hFILE * auth_fp)469 static int read_auth_plain(auth_token *tok, hFILE *auth_fp) {
470     kstring_t line = {0, 0, NULL};
471     kstring_t token = {0, 0, NULL};
472     const char *start, *end;
473 
474     if (kgetline(&line, (char * (*)(char *, int, void *)) hgets, auth_fp) < 0) goto error;
475     if (kputc('\0', &line) < 0) goto error;
476 
477     for (start = line.s; *start && isspace_c(*start); start++) {}
478     for (end = start; *end && !isspace_c(*end); end++) {}
479 
480     if (end > start) {
481         if (kputs("Authorization: Bearer ", &token) < 0) goto error;
482         if (kputsn(start, end - start, &token) < 0) goto error;
483     }
484 
485     free(tok->token);
486     tok->token = ks_release(&token);
487     tok->expiry = 0;
488     free(line.s);
489     return 0;
490 
491  error:
492     free(line.s);
493     free(token.s);
494     return -1;
495 }
496 
renew_auth_token(auth_token * tok,int * changed)497 static int renew_auth_token(auth_token *tok, int *changed) {
498     hFILE *auth_fp = NULL;
499     char buffer[16];
500     ssize_t len;
501 
502     *changed = 0;
503     if (tok->expiry == 0 || time(NULL) + AUTH_REFRESH_EARLY_SECS < tok->expiry)
504         return 0; // Still valid
505 
506     if (tok->failed)
507         return -1;
508 
509     *changed = 1;
510     auth_fp = hopen(tok->path, "rR");
511     if (!auth_fp) {
512         // Not worried about missing files; other errors are bad.
513         if (errno != ENOENT)
514             goto fail;
515 
516         tok->expiry = 0; // Prevent retry
517         free(tok->token); // Just in case it was set
518         return 0;
519     }
520 
521     len = hpeek(auth_fp, buffer, sizeof(buffer));
522     if (len < 0)
523         goto fail;
524 
525     if (memchr(buffer, '{', len) != NULL) {
526         if (read_auth_json(tok, auth_fp) != 'v')
527             goto fail;
528     } else {
529         if (read_auth_plain(tok, auth_fp) < 0)
530             goto fail;
531     }
532 
533     return hclose(auth_fp) < 0 ? -1 : 0;
534 
535  fail:
536     tok->failed = 1;
537     if (auth_fp) hclose_abruptly(auth_fp);
538     return -1;
539 }
540 
add_auth_header(hFILE_libcurl * fp)541 static int add_auth_header(hFILE_libcurl *fp) {
542     int changed = 0;
543 
544     if (fp->headers.auth_hdr_num < 0)
545         return 0; // Have an Authorization header from open or header callback
546 
547     if (!fp->headers.auth)
548         return 0; // Nothing to add
549 
550     pthread_mutex_lock(&fp->headers.auth->lock);
551     if (renew_auth_token(fp->headers.auth, &changed) < 0)
552         goto unlock_fail;
553 
554     if (!changed && fp->headers.auth_hdr_num > 0) {
555         pthread_mutex_unlock(&fp->headers.auth->lock);
556         return 0;
557     }
558 
559     if (fp->headers.auth_hdr_num > 0) {
560         // Had a previous header, so swap in the new one
561         char *header = fp->headers.auth->token;
562         char *header_copy = header ? strdup(header) : NULL;
563         int idx = fp->headers.auth_hdr_num - 1;
564         if (header && !header_copy)
565             goto unlock_fail;
566 
567         if (header_copy) {
568             free(fp->headers.extra.list[idx].data);
569             fp->headers.extra.list[idx].data = header_copy;
570         } else {
571             unsigned int j;
572             // More complicated case - need to get rid of the old header
573             // and tidy up linked lists
574             free(fp->headers.extra.list[idx].data);
575             for (j = idx + 1; j < fp->headers.extra.num; j++) {
576                 fp->headers.extra.list[j - 1] = fp->headers.extra.list[j];
577                 fp->headers.extra.list[j - 1].next = &fp->headers.extra.list[j];
578             }
579             fp->headers.extra.num--;
580             if (fp->headers.extra.num > 0) {
581                 fp->headers.extra.list[fp->headers.extra.num-1].next = NULL;
582             } else if (fp->headers.fixed.num > 0) {
583                 fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
584             }
585             fp->headers.auth_hdr_num = 0;
586         }
587     } else if (fp->headers.auth->token) {
588         // Add new header and remember where it is
589         if (append_header(&fp->headers.extra,
590                           fp->headers.auth->token, 1) < 0) {
591             goto unlock_fail;
592         }
593         fp->headers.auth_hdr_num = fp->headers.extra.num;
594     }
595 
596     pthread_mutex_unlock(&fp->headers.auth->lock);
597     return 0;
598 
599  unlock_fail:
600     pthread_mutex_unlock(&fp->headers.auth->lock);
601     return -1;
602 }
603 
get_auth_token(hFILE_libcurl * fp,const char * url)604 static int get_auth_token(hFILE_libcurl *fp, const char *url) {
605     const char *host = NULL, *p, *q;
606     kstring_t name = {0, 0, NULL};
607     size_t host_len = 0;
608     khiter_t idx;
609     auth_token *tok = NULL;
610 
611     // Nothing to do if:
612     //   curl.auth_path has not been set
613     //   fp was made by hfile_libcurl (e.g. auth_path is a http:// url)
614     //   we already have an Authorization header
615     if (!curl.auth_path || fp->is_recursive || fp->headers.auth_hdr_num != 0)
616         return 0;
617 
618     // Insist on having a secure connection unless the user insists harder
619     if (!curl.allow_unencrypted_auth_header && strncmp(url, "https://", 8) != 0)
620         return 0;
621 
622     host = strstr(url, "://");
623     if (host) {
624         host += 3;
625         host_len = strcspn(host, "/");
626     }
627 
628     p = curl.auth_path;
629     while ((q = strstr(p, "%h")) != NULL) {
630         if (q - p > INT_MAX || host_len > INT_MAX) goto error;
631         if (kputsn_(p, q - p, &name) < 0) goto error;
632         if (kputsn_(host, host_len, &name) < 0) goto error;
633         p = q + 2;
634     }
635     if (kputs(p, &name) < 0) goto error;
636 
637     pthread_mutex_lock(&curl.auth_lock);
638     idx = kh_get(auth_map, curl.auth_map, name.s);
639     if (idx < kh_end(curl.auth_map)) {
640         tok = kh_value(curl.auth_map, idx);
641     } else {
642         tok = calloc(1, sizeof(*tok));
643         if (tok && pthread_mutex_init(&tok->lock, NULL) != 0) {
644             free(tok);
645             tok = NULL;
646         }
647         if (tok) {
648             int ret = -1;
649             tok->path = ks_release(&name);
650             tok->token = NULL;
651             tok->expiry = 1; // Force refresh
652             idx = kh_put(auth_map, curl.auth_map, tok->path, &ret);
653             if (ret < 0) {
654                 free_auth(tok);
655                 tok = NULL;
656             }
657             kh_value(curl.auth_map, idx) = tok;
658         }
659     }
660     pthread_mutex_unlock(&curl.auth_lock);
661 
662     fp->headers.auth = tok;
663     free(name.s);
664 
665     return add_auth_header(fp);
666 
667  error:
668     free(name.s);
669     return -1;
670 }
671 
process_messages(hFILE_libcurl * fp)672 static void process_messages(hFILE_libcurl *fp)
673 {
674     CURLMsg *msg;
675     int remaining;
676 
677     while ((msg = curl_multi_info_read(fp->multi, &remaining)) != NULL) {
678         switch (msg->msg) {
679         case CURLMSG_DONE:
680             fp->finished = 1;
681             fp->final_result = msg->data.result;
682             break;
683 
684         default:
685             break;
686         }
687     }
688 }
689 
wait_perform(hFILE_libcurl * fp)690 static int wait_perform(hFILE_libcurl *fp)
691 {
692     fd_set rd, wr, ex;
693     int maxfd, nrunning;
694     long timeout;
695     CURLMcode errm;
696 
697     if (!fp->perform_again) {
698         FD_ZERO(&rd);
699         FD_ZERO(&wr);
700         FD_ZERO(&ex);
701         if (curl_multi_fdset(fp->multi, &rd, &wr, &ex, &maxfd) != CURLM_OK)
702             maxfd = -1, timeout = 1000;
703         else {
704             if (curl_multi_timeout(fp->multi, &timeout) != CURLM_OK)
705                 timeout = 1000;
706             else if (timeout < 0) {
707                 timeout = 10000;  // as recommended by curl_multi_timeout(3)
708             }
709         }
710         if (maxfd < 0) {
711             if (timeout > 100)
712                 timeout = 100; // as recommended by curl_multi_fdset(3)
713 #ifdef _WIN32
714             /* Windows ignores the first argument of select, so calling select
715              * with maxfd=-1 does not give the expected result of sleeping for
716              * timeout milliseconds in the conditional block below.
717              * So sleep here and skip the next block.
718              */
719             Sleep(timeout);
720             timeout = 0;
721 #endif
722         }
723 
724         if (timeout > 0) {
725             struct timeval tval;
726             tval.tv_sec  = (timeout / 1000);
727             tval.tv_usec = (timeout % 1000) * 1000;
728 
729             if (select(maxfd + 1, &rd, &wr, &ex, &tval) < 0) return -1;
730         }
731     }
732 
733     errm = curl_multi_perform(fp->multi, &nrunning);
734     fp->perform_again = 0;
735     if (errm == CURLM_CALL_MULTI_PERFORM) fp->perform_again = 1;
736     else if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
737 
738     if (nrunning < fp->nrunning) process_messages(fp);
739     return 0;
740 }
741 
742 
recv_callback(char * ptr,size_t size,size_t nmemb,void * fpv)743 static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
744 {
745     hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
746     size_t n = size * nmemb;
747 
748     if (n > fp->buffer.len) {
749         fp->paused = 1;
750         return CURL_WRITEFUNC_PAUSE;
751     }
752     else if (n == 0) return 0;
753 
754     memcpy(fp->buffer.ptr.rd, ptr, n);
755     fp->buffer.ptr.rd += n;
756     fp->buffer.len -= n;
757     return n;
758 }
759 
760 
header_callback(void * contents,size_t size,size_t nmemb,void * userp)761 static size_t header_callback(void *contents, size_t size, size_t nmemb,
762                               void *userp)
763 {
764     size_t realsize = size * nmemb;
765     kstring_t *resp = (kstring_t *)userp;
766 
767     if (kputsn((const char *)contents, realsize, resp) == EOF) {
768         return 0;
769     }
770 
771     return realsize;
772 }
773 
774 
libcurl_read(hFILE * fpv,void * bufferv,size_t nbytes)775 static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes)
776 {
777     hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
778     char *buffer = (char *) bufferv;
779     off_t to_skip = -1;
780     ssize_t got = 0;
781     CURLcode err;
782 
783     if (fp->delayed_seek >= 0) {
784         assert(fp->base.offset == fp->delayed_seek);
785 
786         if (fp->preserved
787             && fp->last_offset > fp->delayed_seek
788             && fp->last_offset - fp->preserved_bytes <= fp->delayed_seek) {
789             // Can use buffer contents copied when seeking started, to
790             // avoid having to re-read data discarded by hseek().
791             // Note fp->last_offset is the offset of the *end* of the
792             // preserved buffer.
793             size_t n = fp->last_offset - fp->delayed_seek;
794             char *start = fp->preserved + (fp->preserved_bytes - n);
795             size_t bytes = n <= nbytes ? n : nbytes;
796             memcpy(buffer, start, bytes);
797             if (bytes < n) { // Part of the preserved buffer still left
798                 fp->delayed_seek += bytes;
799             } else {
800                 fp->last_offset = fp->delayed_seek = -1;
801             }
802             return bytes;
803         }
804 
805         if (fp->last_offset >= 0
806             && fp->delayed_seek > fp->last_offset
807             && fp->delayed_seek - fp->last_offset < MIN_SEEK_FORWARD) {
808             // If not seeking far, just read the data and throw it away.  This
809             // is likely to be quicker than opening a new stream
810             to_skip = fp->delayed_seek - fp->last_offset;
811         } else {
812             if (restart_from_position(fp, fp->delayed_seek) < 0) {
813                 return -1;
814             }
815         }
816         fp->delayed_seek = -1;
817         fp->last_offset = -1;
818         fp->preserved_bytes = 0;
819     }
820 
821     do {
822         fp->buffer.ptr.rd = buffer;
823         fp->buffer.len = nbytes;
824         fp->paused = 0;
825         if (!fp->finished) {
826             err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
827             if (err != CURLE_OK) {
828                 errno = easy_errno(fp->easy, err);
829                 return -1;
830             }
831         }
832 
833         while (! fp->paused && ! fp->finished) {
834             if (wait_perform(fp) < 0) return -1;
835         }
836 
837         got = fp->buffer.ptr.rd - buffer;
838 
839         if (to_skip >= 0) { // Skipping over a small seek
840             if (got < to_skip) { // Need to skip more data
841                 to_skip -= got;
842             } else {
843                 got -= to_skip;
844                 if (got > 0) {  // If enough was skipped, return the rest
845                     memmove(buffer, buffer + to_skip, got);
846                     to_skip = -1;
847                 }
848             }
849         }
850     } while (to_skip >= 0 && ! fp->finished);
851     fp->buffer.ptr.rd = NULL;
852     fp->buffer.len = 0;
853 
854     if (fp->finished && fp->final_result != CURLE_OK) {
855         errno = easy_errno(fp->easy, fp->final_result);
856         return -1;
857     }
858 
859     return got;
860 }
861 
send_callback(char * ptr,size_t size,size_t nmemb,void * fpv)862 static size_t send_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
863 {
864     hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
865     size_t n = size * nmemb;
866 
867     if (fp->buffer.len == 0) {
868         // Send buffer is empty; normally pause, or signal EOF if we're closing
869         if (fp->closing) return 0;
870         else { fp->paused = 1; return CURL_READFUNC_PAUSE; }
871     }
872 
873     if (n > fp->buffer.len) n = fp->buffer.len;
874     memcpy(ptr, fp->buffer.ptr.wr, n);
875     fp->buffer.ptr.wr += n;
876     fp->buffer.len -= n;
877     return n;
878 }
879 
libcurl_write(hFILE * fpv,const void * bufferv,size_t nbytes)880 static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes)
881 {
882     hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
883     const char *buffer = (const char *) bufferv;
884     CURLcode err;
885 
886     fp->buffer.ptr.wr = buffer;
887     fp->buffer.len = nbytes;
888     fp->paused = 0;
889     err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
890     if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
891 
892     while (! fp->paused && ! fp->finished)
893         if (wait_perform(fp) < 0) return -1;
894 
895     nbytes = fp->buffer.ptr.wr - buffer;
896     fp->buffer.ptr.wr = NULL;
897     fp->buffer.len = 0;
898 
899     if (fp->finished && fp->final_result != CURLE_OK) {
900         errno = easy_errno(fp->easy, fp->final_result);
901         return -1;
902     }
903 
904     return nbytes;
905 }
906 
preserve_buffer_content(hFILE_libcurl * fp)907 static void preserve_buffer_content(hFILE_libcurl *fp)
908 {
909     if (fp->base.begin == fp->base.end) {
910         fp->preserved_bytes = 0;
911         return;
912     }
913     if (!fp->preserved
914         || fp->preserved_size < fp->base.limit - fp->base.buffer) {
915         fp->preserved = malloc(fp->base.limit - fp->base.buffer);
916         if (!fp->preserved) return;
917         fp->preserved_size = fp->base.limit - fp->base.buffer;
918     }
919 
920     assert(fp->base.end - fp->base.begin <= fp->preserved_size);
921 
922     memcpy(fp->preserved, fp->base.begin, fp->base.end - fp->base.begin);
923     fp->preserved_bytes = fp->base.end - fp->base.begin;
924     return;
925 }
926 
libcurl_seek(hFILE * fpv,off_t offset,int whence)927 static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence)
928 {
929     hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
930     off_t origin, pos;
931 
932     if (!fp->is_read || !fp->can_seek) {
933         // Cowardly refuse to seek when writing or a previous seek failed.
934         errno = ESPIPE;
935         return -1;
936     }
937 
938     switch (whence) {
939     case SEEK_SET:
940         origin = 0;
941         break;
942     case SEEK_CUR:
943         errno = ENOSYS;
944         return -1;
945     case SEEK_END:
946         if (fp->file_size < 0) { errno = ESPIPE; return -1; }
947         origin = fp->file_size;
948         break;
949     default:
950         errno = EINVAL;
951         return -1;
952     }
953 
954     // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow
955     if ((offset < 0)? origin + offset < 0
956                 : (fp->file_size >= 0 && offset > fp->file_size - origin)) {
957         errno = EINVAL;
958         return -1;
959     }
960 
961     pos = origin + offset;
962 
963     if (fp->tried_seek) {
964         /* Seeking has worked at least once, so now we can delay doing
965            the actual work until the next read.  This avoids lots of pointless
966            http or ftp reconnections if the caller does lots of seeks
967            without any intervening reads. */
968         if (fp->delayed_seek < 0) {
969             fp->last_offset = fp->base.offset + (fp->base.end - fp->base.buffer);
970             // Stash the current hFILE buffer content in case it's useful later
971             preserve_buffer_content(fp);
972         }
973         fp->delayed_seek = pos;
974         return pos;
975     }
976 
977     if (restart_from_position(fp, pos) < 0) {
978         /* This value for errno may not be entirely true, but the caller may be
979            able to carry on with the existing handle. */
980         errno = ESPIPE;
981         return -1;
982     }
983 
984     fp->tried_seek = 1;
985     return pos;
986 }
987 
restart_from_position(hFILE_libcurl * fp,off_t pos)988 static int restart_from_position(hFILE_libcurl *fp, off_t pos) {
989     hFILE_libcurl temp_fp;
990     CURLcode err;
991     CURLMcode errm;
992     int update_headers = 0;
993     int save_errno = 0;
994 
995     // TODO If we seem to be doing random access, use CURLOPT_RANGE to do
996     // limited reads (e.g. about a BAM block!) so seeking can reuse the
997     // existing connection more often.
998 
999     // Get new headers from the callback (if defined).  This changes the
1000     // headers in fp before it gets duplicated, but they should be have been
1001     // sent by now.
1002 
1003     if (fp->headers.callback) {
1004         if (add_callback_headers(fp) != 0)
1005             return -1;
1006         update_headers = 1;
1007     }
1008     if (fp->headers.auth_hdr_num > 0 && fp->headers.auth) {
1009         if (add_auth_header(fp) != 0)
1010             return -1;
1011         update_headers = 1;
1012     }
1013     if (update_headers) {
1014         struct curl_slist *list = get_header_list(fp);
1015         if (list) {
1016             err = curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1017             if (err != CURLE_OK) {
1018                 errno = easy_errno(fp->easy,err);
1019                 return -1;
1020             }
1021         }
1022     }
1023 
1024     /*
1025       Duplicate the easy handle, and use CURLOPT_RESUME_FROM_LARGE to open
1026       a new request to the server, reading from the location that we want
1027       to seek to.  If the new request works and returns the correct data,
1028       the original easy handle in *fp is closed and replaced with the new
1029       one.  If not, we close the new handle and leave *fp unchanged.
1030      */
1031 
1032     memcpy(&temp_fp, fp, sizeof(temp_fp));
1033     temp_fp.buffer.len = 0;
1034     temp_fp.buffer.ptr.rd = NULL;
1035     temp_fp.easy = curl_easy_duphandle(fp->easy);
1036     if (!temp_fp.easy)
1037         goto early_error;
1038 
1039     err = curl_easy_setopt(temp_fp.easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos);
1040     err |= curl_easy_setopt(temp_fp.easy, CURLOPT_PRIVATE, &temp_fp);
1041     err |= curl_easy_setopt(temp_fp.easy, CURLOPT_WRITEDATA, &temp_fp);
1042     if (err != CURLE_OK) {
1043         save_errno = easy_errno(temp_fp.easy, err);
1044         goto error;
1045     }
1046 
1047     temp_fp.buffer.len = 0;  // Ensures we only read the response headers
1048     temp_fp.paused = temp_fp.finished = 0;
1049 
1050     // fp->multi and temp_fp.multi are the same.
1051     errm = curl_multi_add_handle(fp->multi, temp_fp.easy);
1052     if (errm != CURLM_OK) {
1053         save_errno = multi_errno(errm);
1054         goto error;
1055     }
1056     temp_fp.nrunning = ++fp->nrunning;
1057 
1058     while (! temp_fp.paused && ! temp_fp.finished)
1059         if (wait_perform(&temp_fp) < 0) {
1060             save_errno = errno;
1061             goto error_remove;
1062         }
1063 
1064     if (temp_fp.finished && temp_fp.final_result != CURLE_OK) {
1065         save_errno = easy_errno(temp_fp.easy, temp_fp.final_result);
1066         goto error_remove;
1067     }
1068 
1069     // We've got a good response, close the original connection and
1070     // replace it with the new one.
1071 
1072     errm = curl_multi_remove_handle(fp->multi, fp->easy);
1073     if (errm != CURLM_OK) {
1074         // Clean up as much as possible
1075         curl_easy_reset(temp_fp.easy);
1076         if (curl_multi_remove_handle(fp->multi, temp_fp.easy) == CURLM_OK) {
1077             fp->nrunning--;
1078             curl_easy_cleanup(temp_fp.easy);
1079         }
1080         save_errno = multi_errno(errm);
1081         goto early_error;
1082     }
1083     fp->nrunning--;
1084 
1085     curl_easy_cleanup(fp->easy);
1086     fp->easy = temp_fp.easy;
1087     err = curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1088     err |= curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1089     if (err != CURLE_OK) {
1090         save_errno = easy_errno(fp->easy, err);
1091         curl_easy_reset(fp->easy);
1092         errno = save_errno;
1093         return -1;
1094     }
1095     fp->buffer.len = 0;
1096     fp->paused = temp_fp.paused;
1097     fp->finished = temp_fp.finished;
1098     fp->perform_again = temp_fp.perform_again;
1099     fp->final_result = temp_fp.final_result;
1100 
1101     return 0;
1102 
1103  error_remove:
1104     curl_easy_reset(temp_fp.easy); // Ensure no pointers to on-stack temp_fp
1105     errm = curl_multi_remove_handle(fp->multi, temp_fp.easy);
1106     if (errm != CURLM_OK) {
1107         errno = multi_errno(errm);
1108         return -1;
1109     }
1110     fp->nrunning--;
1111  error:
1112     curl_easy_cleanup(temp_fp.easy);
1113  early_error:
1114     fp->can_seek = 0;  // Don't try to seek again
1115     if (save_errno)
1116         errno = save_errno;
1117     return -1;
1118 }
1119 
libcurl_close(hFILE * fpv)1120 static int libcurl_close(hFILE *fpv)
1121 {
1122     hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
1123     CURLcode err;
1124     CURLMcode errm;
1125     int save_errno = 0;
1126 
1127     // Before closing the file, unpause it and perform on it so that uploads
1128     // have the opportunity to signal EOF to the server -- see send_callback().
1129 
1130     fp->buffer.len = 0;
1131     fp->closing = 1;
1132     fp->paused = 0;
1133     if (!fp->finished) {
1134         err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
1135         if (err != CURLE_OK) save_errno = easy_errno(fp->easy, err);
1136     }
1137 
1138     while (save_errno == 0 && ! fp->paused && ! fp->finished)
1139         if (wait_perform(fp) < 0) save_errno = errno;
1140 
1141     if (fp->finished && fp->final_result != CURLE_OK)
1142         save_errno = easy_errno(fp->easy, fp->final_result);
1143 
1144     errm = curl_multi_remove_handle(fp->multi, fp->easy);
1145     if (errm != CURLM_OK && save_errno == 0) save_errno = multi_errno(errm);
1146     fp->nrunning--;
1147 
1148     curl_easy_cleanup(fp->easy);
1149     curl_multi_cleanup(fp->multi);
1150 
1151     if (fp->headers.callback) // Tell callback to free any data it needs to
1152         fp->headers.callback(fp->headers.callback_data, NULL);
1153     free_headers(&fp->headers.fixed, 1);
1154     free_headers(&fp->headers.extra, 1);
1155 
1156     free(fp->preserved);
1157 
1158     if (save_errno) { errno = save_errno; return -1; }
1159     else return 0;
1160 }
1161 
1162 static const struct hFILE_backend libcurl_backend =
1163 {
1164     libcurl_read, libcurl_write, libcurl_seek, NULL, libcurl_close
1165 };
1166 
1167 static hFILE *
libcurl_open(const char * url,const char * modes,http_headers * headers)1168 libcurl_open(const char *url, const char *modes, http_headers *headers)
1169 {
1170     hFILE_libcurl *fp;
1171     struct curl_slist *list;
1172     char mode;
1173     const char *s;
1174     CURLcode err;
1175     CURLMcode errm;
1176     int save, is_recursive;
1177     kstring_t in_header = {0, 0, NULL};
1178     long response;
1179 
1180     is_recursive = strchr(modes, 'R') != NULL;
1181 
1182     if ((s = strpbrk(modes, "rwa+")) != NULL) {
1183         mode = *s;
1184         if (strpbrk(&s[1], "rwa+")) mode = 'e';
1185     }
1186     else mode = '\0';
1187 
1188     if (mode != 'r' && mode != 'w') { errno = EINVAL; goto early_error; }
1189 
1190     fp = (hFILE_libcurl *) hfile_init(sizeof (hFILE_libcurl), modes, 0);
1191     if (fp == NULL) goto early_error;
1192 
1193     if (headers) {
1194         fp->headers = *headers;
1195     } else {
1196         memset(&fp->headers, 0, sizeof(fp->headers));
1197         fp->headers.fail_on_error = 1;
1198     }
1199 
1200     fp->file_size = -1;
1201     fp->buffer.ptr.rd = NULL;
1202     fp->buffer.len = 0;
1203     fp->final_result = (CURLcode) -1;
1204     fp->paused = fp->closing = fp->finished = fp->perform_again = 0;
1205     fp->can_seek = 1;
1206     fp->tried_seek = 0;
1207     fp->delayed_seek = fp->last_offset = -1;
1208     fp->preserved = NULL;
1209     fp->preserved_bytes = fp->preserved_size = 0;
1210     fp->is_recursive = is_recursive;
1211     fp->nrunning = 0;
1212     fp->easy = NULL;
1213 
1214     fp->multi = curl_multi_init();
1215     if (fp->multi == NULL) { errno = ENOMEM; goto error; }
1216 
1217     fp->easy = curl_easy_init();
1218     if (fp->easy == NULL) { errno = ENOMEM; goto error; }
1219 
1220     // Make a route to the hFILE_libcurl* given just a CURL* easy handle
1221     err = curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1222 
1223     // Avoid many repeated CWD calls with FTP, instead requesting the filename
1224     // by full path (as done in knet, but not strictly compliant with RFC1738).
1225     err |= curl_easy_setopt(fp->easy, CURLOPT_FTP_FILEMETHOD, CURLFTPMETHOD_NOCWD);
1226 
1227     if (mode == 'r') {
1228         err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback);
1229         err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1230         fp->is_read = 1;
1231     }
1232     else {
1233         err |= curl_easy_setopt(fp->easy, CURLOPT_READFUNCTION, send_callback);
1234         err |= curl_easy_setopt(fp->easy, CURLOPT_READDATA, fp);
1235         err |= curl_easy_setopt(fp->easy, CURLOPT_UPLOAD, 1L);
1236         if (append_header(&fp->headers.fixed,
1237                           "Transfer-Encoding: chunked", 1) < 0)
1238             goto error;
1239         fp->is_read = 0;
1240     }
1241 
1242     err |= curl_easy_setopt(fp->easy, CURLOPT_SHARE, curl.share);
1243     err |= curl_easy_setopt(fp->easy, CURLOPT_URL, url);
1244     {
1245         char* env_curl_ca_bundle = getenv("CURL_CA_BUNDLE");
1246         if (env_curl_ca_bundle) {
1247             err |= curl_easy_setopt(fp->easy, CURLOPT_CAINFO, env_curl_ca_bundle);
1248         }
1249     }
1250     err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s);
1251     if (fp->headers.callback) {
1252         if (add_callback_headers(fp) != 0) goto error;
1253     }
1254     if (get_auth_token(fp, url) < 0)
1255         goto error;
1256     if ((list = get_header_list(fp)) != NULL)
1257         err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1258 
1259     if (hts_verbose <= 8 && fp->headers.fail_on_error)
1260         err |= curl_easy_setopt(fp->easy, CURLOPT_FAILONERROR, 1L);
1261     if (hts_verbose >= 8)
1262         err |= curl_easy_setopt(fp->easy, CURLOPT_VERBOSE, 1L);
1263 
1264     if (fp->headers.redirect) {
1265         err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, header_callback);
1266         err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, (void *)&in_header);
1267     } else {
1268         err |= curl_easy_setopt(fp->easy, CURLOPT_FOLLOWLOCATION, 1L);
1269     }
1270 
1271     if (err != 0) { errno = ENOSYS; goto error; }
1272 
1273     errm = curl_multi_add_handle(fp->multi, fp->easy);
1274     if (errm != CURLM_OK) { errno = multi_errno(errm); goto error; }
1275     fp->nrunning++;
1276 
1277     while (! fp->paused && ! fp->finished) {
1278         if (wait_perform(fp) < 0) goto error_remove;
1279     }
1280 
1281     curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE, &response);
1282     if (fp->headers.http_response_ptr) {
1283         *fp->headers.http_response_ptr = response;
1284     }
1285 
1286     if (fp->finished && fp->final_result != CURLE_OK) {
1287         errno = easy_errno(fp->easy, fp->final_result);
1288         goto error_remove;
1289     }
1290 
1291     if (fp->headers.redirect) {
1292         if (response >= 300 && response < 400) { // redirection
1293             kstring_t new_url = {0, 0, NULL};
1294 
1295             if (fp->headers.redirect(fp->headers.redirect_data, response,
1296                                      &in_header, &new_url)) {
1297                 errno = ENOSYS;
1298                 goto error;
1299             }
1300 
1301             err |= curl_easy_setopt(fp->easy, CURLOPT_URL, new_url.s);
1302             err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL);
1303             err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL);
1304             free(ks_release(&in_header));
1305 
1306             if (err != 0) { errno = ENOSYS; goto error; }
1307             free(ks_release(&new_url));
1308 
1309             if (restart_from_position(fp, 0) < 0) {
1310                 goto error_remove;
1311             }
1312 
1313             if (fp->headers.http_response_ptr) {
1314                 curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE,
1315                                   fp->headers.http_response_ptr);
1316             }
1317 
1318             if (fp->finished && fp->final_result != CURLE_OK) {
1319                 errno = easy_errno(fp->easy, fp->final_result);
1320                 goto error_remove;
1321             }
1322         } else {
1323             // we no longer need to look at the headers
1324             err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL);
1325             err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL);
1326             free(ks_release(&in_header));
1327 
1328             if (err != 0) { errno = ENOSYS; goto error; }
1329         }
1330     }
1331 
1332     if (mode == 'r') {
1333         double dval;
1334 
1335         if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD,
1336                               &dval) == CURLE_OK && dval >= 0.0)
1337             fp->file_size = (off_t) (dval + 0.1);
1338     }
1339 
1340     fp->base.backend = &libcurl_backend;
1341     return &fp->base;
1342 
1343 error_remove:
1344     save = errno;
1345     (void) curl_multi_remove_handle(fp->multi, fp->easy);
1346     fp->nrunning--;
1347     errno = save;
1348 
1349 error:
1350     if (fp->headers.redirect) free(in_header.s);
1351     save = errno;
1352     if (fp->easy) curl_easy_cleanup(fp->easy);
1353     if (fp->multi) curl_multi_cleanup(fp->multi);
1354     free_headers(&fp->headers.extra, 1);
1355     hfile_destroy((hFILE *) fp);
1356     errno = save;
1357     return NULL;
1358 
1359 early_error:
1360     return NULL;
1361 }
1362 
hopen_libcurl(const char * url,const char * modes)1363 static hFILE *hopen_libcurl(const char *url, const char *modes)
1364 {
1365     return libcurl_open(url, modes, NULL);
1366 }
1367 
parse_va_list(http_headers * headers,va_list args)1368 static int parse_va_list(http_headers *headers, va_list args)
1369 {
1370     const char *argtype;
1371 
1372     while ((argtype = va_arg(args, const char *)) != NULL)
1373         if (strcmp(argtype, "httphdr:v") == 0) {
1374             const char **hdr;
1375             for (hdr = va_arg(args, const char **); *hdr; hdr++) {
1376                 if (append_header(&headers->fixed, *hdr, 1) < 0)
1377                     return -1;
1378                 if (is_authorization(*hdr))
1379                     headers->auth_hdr_num = -1;
1380             }
1381         }
1382         else if (strcmp(argtype, "httphdr:l") == 0) {
1383             const char *hdr;
1384             while ((hdr = va_arg(args, const char *)) != NULL) {
1385                 if (append_header(&headers->fixed, hdr, 1) < 0)
1386                     return -1;
1387                 if (is_authorization(hdr))
1388                     headers->auth_hdr_num = -1;
1389             }
1390         }
1391         else if (strcmp(argtype, "httphdr") == 0) {
1392             const char *hdr = va_arg(args, const char *);
1393             if (hdr) {
1394                 if (append_header(&headers->fixed, hdr, 1) < 0)
1395                     return -1;
1396                 if (is_authorization(hdr))
1397                     headers->auth_hdr_num = -1;
1398             }
1399         }
1400         else if (strcmp(argtype, "httphdr_callback") == 0) {
1401             headers->callback = va_arg(args, const hts_httphdr_callback);
1402         }
1403         else if (strcmp(argtype, "httphdr_callback_data") == 0) {
1404             headers->callback_data = va_arg(args, void *);
1405         }
1406         else if (strcmp(argtype, "va_list") == 0) {
1407             va_list *args2 = va_arg(args, va_list *);
1408             if (args2) {
1409                 if (parse_va_list(headers, *args2) < 0) return -1;
1410             }
1411         }
1412         else if (strcmp(argtype, "auth_token_enabled") == 0) {
1413             const char *flag = va_arg(args, const char *);
1414             if (strcmp(flag, "false") == 0)
1415                 headers->auth_hdr_num = -3;
1416         }
1417         else if (strcmp(argtype, "redirect_callback") == 0) {
1418             headers->redirect = va_arg(args, const redirect_callback);
1419         }
1420         else if (strcmp(argtype, "redirect_callback_data") == 0) {
1421             headers->redirect_data = va_arg(args, void *);
1422         }
1423         else if (strcmp(argtype, "http_response_ptr") == 0) {
1424             headers->http_response_ptr = va_arg(args, long *);
1425         }
1426         else if (strcmp(argtype, "fail_on_error") == 0) {
1427             headers->fail_on_error = va_arg(args, int);
1428         }
1429         else { errno = EINVAL; return -1; }
1430 
1431     return 0;
1432 }
1433 
1434 /*
1435   HTTP headers to be added to the request can be passed in as extra
1436   arguments to hopen().  The headers can be specified as follows:
1437 
1438   * Single header:
1439     hopen(url, mode, "httphdr", "X-Hdr-1: text", NULL);
1440 
1441   * Multiple headers in the argument list:
1442     hopen(url, mode, "httphdr:l", "X-Hdr-1: text", "X-Hdr-2: text", NULL, NULL);
1443 
1444   * Multiple headers in a char* array:
1445     hopen(url, mode, "httphdr:v", hdrs, NULL);
1446     where `hdrs` is a char **.  The list ends with a NULL pointer.
1447 
1448   * A callback function
1449     hopen(url, mode, "httphdr_callback", func,
1450                      "httphdr_callback_data", arg, NULL);
1451     `func` has type
1452          int (* hts_httphdr_callback) (void *cb_data, char ***hdrs);
1453     `arg` is passed to the callback as a void *.
1454 
1455     The function is called at file open, and when attempting to seek (which
1456     opens a new HTTP request).  This allows, for example, access tokens
1457     that may have gone stale to be regenerated.  The function is also
1458     called (with `hdrs` == NULL) on file close so that the callback can
1459     free any memory that it needs to.
1460 
1461     The callback should return 0 on success, non-zero on failure.  It should
1462     return in *hdrs a list of strings containing the new headers (terminated
1463     with a NULL pointer).  These will replace any headers previously supplied
1464     by the callback.  If no changes are necessary, it can return NULL
1465     in *hdrs, in which case the previous headers will be left unchanged.
1466 
1467     Ownership of the strings in the header list passes to hfile_libcurl,
1468     so the callback should not attempt to use or free them itself.  The memory
1469     containing the array belongs to the callback and will not be freed by
1470     hfile_libcurl.
1471 
1472     Headers supplied by the callback are appended after any specified
1473     using the "httphdr", "httphdr:l" or "httphdr:v" methods.  No attempt
1474     is made to replace these headers (even if a key is repeated) so anything
1475     that is expected to vary needs to come from the callback.
1476  */
1477 
vhopen_libcurl(const char * url,const char * modes,va_list args)1478 static hFILE *vhopen_libcurl(const char *url, const char *modes, va_list args)
1479 {
1480     hFILE *fp = NULL;
1481     http_headers headers = { .fail_on_error = 1 };
1482 
1483     if (parse_va_list(&headers, args) == 0) {
1484         fp = libcurl_open(url, modes, &headers);
1485     }
1486 
1487     if (!fp) {
1488         free_headers(&headers.fixed, 1);
1489     }
1490     return fp;
1491 }
1492 
PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)1493 int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self)
1494 {
1495     static const struct hFILE_scheme_handler handler =
1496         { hopen_libcurl, hfile_always_remote, "libcurl",
1497           2000 + 50,
1498           vhopen_libcurl };
1499 
1500 #ifdef ENABLE_PLUGINS
1501     // Embed version string for examination via strings(1) or what(1)
1502     static const char id[] =
1503         "@(#)hfile_libcurl plugin (htslib)\t" HTS_VERSION_TEXT;
1504     const char *version = strchr(id, '\t')+1;
1505 #else
1506     const char *version = hts_version();
1507 #endif
1508     const curl_version_info_data *info;
1509     const char * const *protocol;
1510     const char *auth;
1511     CURLcode err;
1512     CURLSHcode errsh;
1513 
1514     err = curl_global_init(CURL_GLOBAL_ALL);
1515     if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; }
1516 
1517     curl.share = curl_share_init();
1518     if (curl.share == NULL) { curl_global_cleanup(); errno = EIO; return -1; }
1519     errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
1520     errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
1521     errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
1522     if (errsh != 0) {
1523         curl_share_cleanup(curl.share);
1524         curl_global_cleanup();
1525         errno = EIO;
1526         return -1;
1527     }
1528 
1529     if ((auth = getenv("HTS_AUTH_LOCATION")) != NULL) {
1530         curl.auth_path = strdup(auth);
1531         curl.auth_map = kh_init(auth_map);
1532         if (!curl.auth_path || !curl.auth_map) {
1533             int save_errno = errno;
1534             free(curl.auth_path);
1535             kh_destroy(auth_map, curl.auth_map);
1536             curl_share_cleanup(curl.share);
1537             curl_global_cleanup();
1538             errno = save_errno;
1539             return -1;
1540         }
1541     }
1542     if ((auth = getenv("HTS_ALLOW_UNENCRYPTED_AUTHORIZATION_HEADER")) != NULL
1543         && strcmp(auth, "I understand the risks") == 0) {
1544         curl.allow_unencrypted_auth_header = 1;
1545     }
1546 
1547     info = curl_version_info(CURLVERSION_NOW);
1548     ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
1549 
1550     self->name = "libcurl";
1551     self->destroy = libcurl_exit;
1552 
1553     for (protocol = info->protocols; *protocol; protocol++)
1554         hfile_add_scheme_handler(*protocol, &handler);
1555     return 0;
1556 }
1557