1 /*  hfile.c -- buffered low-level input/output streams.
2 
3     Copyright (C) 2013-2016 Genome Research Ltd.
4 
5     Author: John Marshall <jm18@sanger.ac.uk>
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE.  */
24 
25 #include <config.h>
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <errno.h>
32 #include <limits.h>
33 
34 #include <pthread.h>
35 
36 #include "htslib/hfile.h"
37 #include "hfile_internal.h"
38 
39 #ifndef ENOTSUP
40 #define ENOTSUP EINVAL
41 #endif
42 #ifndef EOVERFLOW
43 #define EOVERFLOW ERANGE
44 #endif
45 #ifndef EPROTONOSUPPORT
46 #define EPROTONOSUPPORT ENOSYS
47 #endif
48 
49 #ifndef SSIZE_MAX /* SSIZE_MAX is POSIX 1 */
50 #define SSIZE_MAX LONG_MAX
51 #endif
52 
53 /* hFILE fields are used as follows:
54 
55    char *buffer;     // Pointer to the start of the I/O buffer
56    char *begin;      // First not-yet-read character / unused position
57    char *end;        // First unfilled/unfillable position
58    char *limit;      // Pointer to the first position past the buffer
59 
60    const hFILE_backend *backend;  // Methods to refill/flush I/O buffer
61 
62    off_t offset;     // Offset within the stream of buffer position 0
63    unsigned at_eof:1;// For reading, whether EOF has been seen
64    unsigned mobile:1;// Buffer is a mobile window or fixed full contents
65    unsigned readonly:1;// Whether opened as "r" rather than "r+"/"w"/"a"
66    int has_errno;    // Error number from the last failure on this stream
67 
68 For reading, begin is the first unread character in the buffer and end is the
69 first unfilled position:
70 
71    -----------ABCDEFGHIJKLMNO---------------
72    ^buffer    ^begin         ^end           ^limit
73 
74 For writing, begin is the first unused position and end is unused so remains
75 equal to buffer:
76 
77    ABCDEFGHIJKLMNOPQRSTUVWXYZ---------------
78    ^buffer                   ^begin         ^limit
79    ^end
80 
81 Thus if begin > end then there is a non-empty write buffer, if begin < end
82 then there is a non-empty read buffer, and if begin == end then both buffers
83 are empty.  In all cases, the stream's file position indicator corresponds
84 to the position pointed to by begin.
85 
86 The above is the normal scenario of a mobile window.  For in-memory
87 streams (eg via hfile_init_fixed) the buffer can be used as the full
88 contents without any separate backend behind it.  These always have at_eof
89 set, offset set to 0, need no read() method, and should just return EINVAL
90 for seek():
91 
92    abcdefghijkLMNOPQRSTUVWXYZ------
93    ^buffer    ^begin         ^end  ^limit
94 */
95 
hfile_init(size_t struct_size,const char * mode,size_t capacity)96 hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity)
97 {
98     hFILE *fp = (hFILE *) malloc(struct_size);
99     if (fp == NULL) goto error;
100 
101     if (capacity == 0) capacity = 32768;
102     // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory
103     if (strchr(mode, 'r') && capacity > 32768) capacity = 32768;
104 
105     fp->buffer = (char *) malloc(capacity);
106     if (fp->buffer == NULL) goto error;
107 
108     fp->begin = fp->end = fp->buffer;
109     fp->limit = &fp->buffer[capacity];
110 
111     fp->offset = 0;
112     fp->at_eof = 0;
113     fp->mobile = 1;
114     fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+'));
115     fp->has_errno = 0;
116     return fp;
117 
118 error:
119     hfile_destroy(fp);
120     return NULL;
121 }
122 
hfile_init_fixed(size_t struct_size,const char * mode,char * buffer,size_t buf_filled,size_t buf_size)123 hFILE *hfile_init_fixed(size_t struct_size, const char *mode,
124                         char *buffer, size_t buf_filled, size_t buf_size)
125 {
126     hFILE *fp = (hFILE *) malloc(struct_size);
127     if (fp == NULL) return NULL;
128 
129     fp->buffer = fp->begin = buffer;
130     fp->end = &fp->buffer[buf_filled];
131     fp->limit = &fp->buffer[buf_size];
132 
133     fp->offset = 0;
134     fp->at_eof = 1;
135     fp->mobile = 0;
136     fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+'));
137     fp->has_errno = 0;
138     return fp;
139 }
140 
141 static const struct hFILE_backend mem_backend;
142 
hfile_destroy(hFILE * fp)143 void hfile_destroy(hFILE *fp)
144 {
145     int save = errno;
146     if (fp) free(fp->buffer);
147     free(fp);
148     errno = save;
149 }
150 
writebuffer_is_nonempty(hFILE * fp)151 static inline int writebuffer_is_nonempty(hFILE *fp)
152 {
153     return fp->begin > fp->end;
154 }
155 
156 /* Refills the read buffer from the backend (once, so may only partially
157    fill the buffer), returning the number of additional characters read
158    (which might be 0), or negative when an error occurred.  */
refill_buffer(hFILE * fp)159 static ssize_t refill_buffer(hFILE *fp)
160 {
161     ssize_t n;
162 
163     // Move any unread characters to the start of the buffer
164     if (fp->mobile && fp->begin > fp->buffer) {
165         fp->offset += fp->begin - fp->buffer;
166         memmove(fp->buffer, fp->begin, fp->end - fp->begin);
167         fp->end = &fp->buffer[fp->end - fp->begin];
168         fp->begin = fp->buffer;
169     }
170 
171     // Read into the available buffer space at fp->[end,limit)
172     if (fp->at_eof || fp->end == fp->limit) n = 0;
173     else {
174         n = fp->backend->read(fp, fp->end, fp->limit - fp->end);
175         if (n < 0) { fp->has_errno = errno; return n; }
176         else if (n == 0) fp->at_eof = 1;
177     }
178 
179     fp->end += n;
180     return n;
181 }
182 
183 /*
184  * Changes the buffer size for an hFILE.  Ideally this is done
185  * immediately after opening.  If performed later, this function may
186  * fail if we are reducing the buffer size and the current offset into
187  * the buffer is beyond the new capacity.
188  *
189  * Returns 0 on success;
190  *        -1 on failure.
191  */
hfile_set_blksize(hFILE * fp,size_t bufsiz)192 int hfile_set_blksize(hFILE *fp, size_t bufsiz) {
193     char *buffer;
194     ptrdiff_t curr_used;
195     if (!fp) return -1;
196     curr_used = (fp->begin > fp->end ? fp->begin : fp->end) - fp->buffer;
197     if (bufsiz == 0) bufsiz = 32768;
198 
199     // Ensure buffer resize will not erase live data
200     if (bufsiz < curr_used)
201         return -1;
202 
203     if (!(buffer = (char *) realloc(fp->buffer, bufsiz))) return -1;
204 
205     fp->begin  = buffer + (fp->begin - fp->buffer);
206     fp->end    = buffer + (fp->end   - fp->buffer);
207     fp->buffer = buffer;
208     fp->limit  = &fp->buffer[bufsiz];
209 
210     return 0;
211 }
212 
213 /* Called only from hgetc(), when our buffer is empty.  */
hgetc2(hFILE * fp)214 int hgetc2(hFILE *fp)
215 {
216     return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF;
217 }
218 
hgetdelim(char * buffer,size_t size,int delim,hFILE * fp)219 ssize_t hgetdelim(char *buffer, size_t size, int delim, hFILE *fp)
220 {
221     char *found;
222     size_t n, copied = 0;
223     ssize_t got;
224 
225     if (size < 1 || size > SSIZE_MAX) {
226         fp->has_errno = errno = EINVAL;
227         return -1;
228     }
229     if (writebuffer_is_nonempty(fp)) {
230         fp->has_errno = errno = EBADF;
231         return -1;
232     }
233 
234     --size; /* to allow space for the NUL terminator */
235 
236     do {
237         n = fp->end - fp->begin;
238         if (n > size - copied) n = size - copied;
239 
240         /* Look in the hFILE buffer for the delimiter */
241         found = memchr(fp->begin, delim, n);
242         if (found != NULL) {
243             n = found - fp->begin + 1;
244             memcpy(buffer + copied, fp->begin, n);
245             buffer[n + copied] = '\0';
246             fp->begin += n;
247             return n + copied;
248         }
249 
250         /* No delimiter yet, copy as much as we can and refill if necessary */
251         memcpy(buffer + copied, fp->begin, n);
252         fp->begin += n;
253         copied += n;
254 
255         if (copied == size) { /* Output buffer full */
256             buffer[copied] = '\0';
257             return copied;
258         }
259 
260         got = refill_buffer(fp);
261     } while (got > 0);
262 
263     if (got < 0) return -1; /* Error on refill. */
264 
265     buffer[copied] = '\0';  /* EOF, return anything that was copied. */
266     return copied;
267 }
268 
hgets(char * buffer,int size,hFILE * fp)269 char *hgets(char *buffer, int size, hFILE *fp)
270 {
271     if (size < 1) {
272         fp->has_errno = errno = EINVAL;
273         return NULL;
274     }
275     return hgetln(buffer, size, fp) > 0 ? buffer : NULL;
276 }
277 
hpeek(hFILE * fp,void * buffer,size_t nbytes)278 ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes)
279 {
280     size_t n = fp->end - fp->begin;
281     while (n < nbytes) {
282         ssize_t ret = refill_buffer(fp);
283         if (ret < 0) return ret;
284         else if (ret == 0) break;
285         else n += ret;
286     }
287 
288     if (n > nbytes) n = nbytes;
289     memcpy(buffer, fp->begin, n);
290     return n;
291 }
292 
293 /* Called only from hread(); when called, our buffer is empty and nread bytes
294    have already been placed in the destination buffer.  */
hread2(hFILE * fp,void * destv,size_t nbytes,size_t nread)295 ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread)
296 {
297     const size_t capacity = fp->limit - fp->buffer;
298     int buffer_invalidated = 0;
299     char *dest = (char *) destv;
300     dest += nread, nbytes -= nread;
301 
302     // Read large requests directly into the destination buffer
303     while (nbytes * 2 >= capacity && !fp->at_eof) {
304         ssize_t n = fp->backend->read(fp, dest, nbytes);
305         if (n < 0) { fp->has_errno = errno; return n; }
306         else if (n == 0) fp->at_eof = 1;
307         else buffer_invalidated = 1;
308         fp->offset += n;
309         dest += n, nbytes -= n;
310         nread += n;
311     }
312 
313     if (buffer_invalidated) {
314         // Our unread buffer is empty, so begin == end, but our already-read
315         // buffer [buffer,begin) is likely non-empty and is no longer valid as
316         // its contents are no longer adjacent to the file position indicator.
317         // Discard it so that hseek() can't try to take advantage of it.
318         fp->offset += fp->begin - fp->buffer;
319         fp->begin = fp->end = fp->buffer;
320     }
321 
322     while (nbytes > 0 && !fp->at_eof) {
323         size_t n;
324         ssize_t ret = refill_buffer(fp);
325         if (ret < 0) return ret;
326 
327         n = fp->end - fp->begin;
328         if (n > nbytes) n = nbytes;
329         memcpy(dest, fp->begin, n);
330         fp->begin += n;
331         dest += n, nbytes -= n;
332         nread += n;
333     }
334 
335     return nread;
336 }
337 
338 /* Flushes the write buffer, fp->[buffer,begin), out through the backend
339    returning 0 on success or negative if an error occurred.  */
flush_buffer(hFILE * fp)340 static ssize_t flush_buffer(hFILE *fp)
341 {
342     const char *buffer = fp->buffer;
343     while (buffer < fp->begin) {
344         ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer);
345         if (n < 0) { fp->has_errno = errno; return n; }
346         buffer += n;
347         fp->offset += n;
348     }
349 
350     fp->begin = fp->buffer;  // Leave the buffer empty
351     return 0;
352 }
353 
hflush(hFILE * fp)354 int hflush(hFILE *fp)
355 {
356     if (flush_buffer(fp) < 0) return EOF;
357     if (fp->backend->flush) {
358         if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; }
359     }
360     return 0;
361 }
362 
363 /* Called only from hputc(), when our buffer is already full.  */
hputc2(int c,hFILE * fp)364 int hputc2(int c, hFILE *fp)
365 {
366     if (flush_buffer(fp) < 0) return EOF;
367     *(fp->begin++) = c;
368     return c;
369 }
370 
371 /* Called only from hwrite() and hputs2(); when called, our buffer is full and
372    ncopied bytes from the source have already been copied to our buffer.  */
hwrite2(hFILE * fp,const void * srcv,size_t totalbytes,size_t ncopied)373 ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied)
374 {
375     const char *src = (const char *) srcv;
376     ssize_t ret;
377     const size_t capacity = fp->limit - fp->buffer;
378     size_t remaining = totalbytes - ncopied;
379     src += ncopied;
380 
381     ret = flush_buffer(fp);
382     if (ret < 0) return ret;
383 
384     // Write large blocks out directly from the source buffer
385     while (remaining * 2 >= capacity) {
386         ssize_t n = fp->backend->write(fp, src, remaining);
387         if (n < 0) { fp->has_errno = errno; return n; }
388         fp->offset += n;
389         src += n, remaining -= n;
390     }
391 
392     // Just buffer any remaining characters
393     memcpy(fp->begin, src, remaining);
394     fp->begin += remaining;
395 
396     return totalbytes;
397 }
398 
399 /* Called only from hputs(), when our buffer is already full.  */
hputs2(const char * text,size_t totalbytes,size_t ncopied,hFILE * fp)400 int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp)
401 {
402     return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF;
403 }
404 
hseek(hFILE * fp,off_t offset,int whence)405 off_t hseek(hFILE *fp, off_t offset, int whence)
406 {
407     off_t curpos, pos;
408 
409     if (writebuffer_is_nonempty(fp) && fp->mobile) {
410         int ret = flush_buffer(fp);
411         if (ret < 0) return ret;
412     }
413 
414     curpos = htell(fp);
415 
416     // Relative offsets are given relative to the hFILE's stream position,
417     // which may differ from the backend's physical position due to buffering
418     // read-ahead.  Correct for this by converting to an absolute position.
419     if (whence == SEEK_CUR) {
420         if (curpos + offset < 0) {
421             // Either a negative offset resulted in a position before the
422             // start of the file, or we overflowed when given a positive offset
423             fp->has_errno = errno = (offset < 0)? EINVAL : EOVERFLOW;
424             return -1;
425         }
426 
427         whence = SEEK_SET;
428         offset = curpos + offset;
429     }
430     // For fixed immobile buffers, convert everything else to SEEK_SET too
431     // so that seeking can be avoided for all (within range) requests.
432     else if (! fp->mobile && whence == SEEK_END) {
433         size_t length = fp->end - fp->buffer;
434         if (offset > 0 || -offset > length) {
435             fp->has_errno = errno = EINVAL;
436             return -1;
437         }
438 
439         whence = SEEK_SET;
440         offset = length + offset;
441     }
442 
443     // Avoid seeking if the desired position is within our read buffer.
444     // (But not when the next operation may be a write on a mobile buffer.)
445     if (whence == SEEK_SET && (! fp->mobile || fp->readonly) &&
446         offset >= fp->offset && offset - fp->offset <= fp->end - fp->buffer) {
447         fp->begin = &fp->buffer[offset - fp->offset];
448         return offset;
449     }
450 
451     pos = fp->backend->seek(fp, offset, whence);
452     if (pos < 0) { fp->has_errno = errno; return pos; }
453 
454     // Seeking succeeded, so discard any non-empty read buffer
455     fp->begin = fp->end = fp->buffer;
456     fp->at_eof = 0;
457 
458     fp->offset = pos;
459     return pos;
460 }
461 
hclose(hFILE * fp)462 int hclose(hFILE *fp)
463 {
464     int err = fp->has_errno;
465 
466     if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno;
467     if (fp->backend->close(fp) < 0) err = errno;
468     hfile_destroy(fp);
469 
470     if (err) {
471         errno = err;
472         return EOF;
473     }
474     else return 0;
475 }
476 
hclose_abruptly(hFILE * fp)477 void hclose_abruptly(hFILE *fp)
478 {
479     int save = errno;
480     if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ }
481     hfile_destroy(fp);
482     errno = save;
483 }
484 
485 
486 /***************************
487  * File descriptor backend *
488  ***************************/
489 
490 #ifndef _WIN32
491 #include <sys/socket.h>
492 #include <sys/stat.h>
493 #define HAVE_STRUCT_STAT_ST_BLKSIZE
494 #else
495 #include <winsock2.h>
496 #define HAVE_CLOSESOCKET
497 #define HAVE_SETMODE
498 #endif
499 #include <fcntl.h>
500 #include <unistd.h>
501 
502 /* For Unix, it doesn't matter whether a file descriptor is a socket.
503    However Windows insists on send()/recv() and its own closesocket()
504    being used when fd happens to be a socket.  */
505 
506 typedef struct {
507     hFILE base;
508     int fd;
509     unsigned is_socket:1;
510 } hFILE_fd;
511 
fd_read(hFILE * fpv,void * buffer,size_t nbytes)512 static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes)
513 {
514     hFILE_fd *fp = (hFILE_fd *) fpv;
515     ssize_t n;
516     do {
517         n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0)
518                          : read(fp->fd, buffer, nbytes);
519     } while (n < 0 && errno == EINTR);
520     return n;
521 }
522 
fd_write(hFILE * fpv,const void * buffer,size_t nbytes)523 static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes)
524 {
525     hFILE_fd *fp = (hFILE_fd *) fpv;
526     ssize_t n;
527     do {
528         n = fp->is_socket?  send(fp->fd, buffer, nbytes, 0)
529                          : write(fp->fd, buffer, nbytes);
530     } while (n < 0 && errno == EINTR);
531 #ifdef _WIN32
532         // On windows we have no SIGPIPE.  Instead write returns
533         // EINVAL.  We check for this and our fd being a pipe.
534         // If so, we raise SIGTERM instead of SIGPIPE.  It's not
535         // ideal, but I think the only alternative is extra checking
536         // in every single piece of code.
537         if (n < 0 && errno == EINVAL &&
538             GetLastError() == ERROR_NO_DATA &&
539             GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) {
540             raise(SIGTERM);
541         }
542 #endif
543     return n;
544 }
545 
fd_seek(hFILE * fpv,off_t offset,int whence)546 static off_t fd_seek(hFILE *fpv, off_t offset, int whence)
547 {
548     hFILE_fd *fp = (hFILE_fd *) fpv;
549     return lseek(fp->fd, offset, whence);
550 }
551 
fd_flush(hFILE * fpv)552 static int fd_flush(hFILE *fpv)
553 {
554     int ret = 0;
555     do {
556 #ifdef HAVE_FDATASYNC
557         hFILE_fd *fp = (hFILE_fd *) fpv;
558         ret = fdatasync(fp->fd);
559 #elif defined(HAVE_FSYNC)
560         hFILE_fd *fp = (hFILE_fd *) fpv;
561         ret = fsync(fp->fd);
562 #endif
563         // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe,
564         // and operation-not-supported errors (Mac OS X)
565         if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0;
566     } while (ret < 0 && errno == EINTR);
567     return ret;
568 }
569 
fd_close(hFILE * fpv)570 static int fd_close(hFILE *fpv)
571 {
572     hFILE_fd *fp = (hFILE_fd *) fpv;
573     int ret;
574     do {
575 #ifdef HAVE_CLOSESOCKET
576         ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd);
577 #else
578         ret = close(fp->fd);
579 #endif
580     } while (ret < 0 && errno == EINTR);
581     return ret;
582 }
583 
584 static const struct hFILE_backend fd_backend =
585 {
586     fd_read, fd_write, fd_seek, fd_flush, fd_close
587 };
588 
blksize(int fd)589 static size_t blksize(int fd)
590 {
591 #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
592     struct stat sbuf;
593     if (fstat(fd, &sbuf) != 0) return 0;
594     return sbuf.st_blksize;
595 #else
596     return 0;
597 #endif
598 }
599 
hopen_fd(const char * filename,const char * mode)600 static hFILE *hopen_fd(const char *filename, const char *mode)
601 {
602     hFILE_fd *fp = NULL;
603     int fd = open(filename, hfile_oflags(mode), 0666);
604     if (fd < 0) goto error;
605 
606     fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
607     if (fp == NULL) goto error;
608 
609     fp->fd = fd;
610     fp->is_socket = 0;
611     fp->base.backend = &fd_backend;
612     return &fp->base;
613 
614 error:
615     if (fd >= 0) { int save = errno; (void) close(fd); errno = save; }
616     hfile_destroy((hFILE *) fp);
617     return NULL;
618 }
619 
620 // Loads the contents of filename to produced a read-only, in memory,
621 // immobile hfile.  fp is the already opened file.  We always close this
622 // input fp, irrespective of whether we error or whether we return a new
623 // immobile hfile.
hpreload(hFILE * fp)624 static hFILE *hpreload(hFILE *fp) {
625     hFILE *mem_fp;
626     char *buf = NULL;
627     off_t buf_sz = 0, buf_a = 0, buf_inc = 8192, len;
628 
629     for (;;) {
630         if (buf_a - buf_sz < 5000) {
631             buf_a += buf_inc;
632             char *t = realloc(buf, buf_a);
633             if (!t) goto err;
634             buf = t;
635             if (buf_inc < 1000000) buf_inc *= 1.3;
636         }
637         len = hread(fp, buf+buf_sz, buf_a-buf_sz);
638         if (len > 0)
639             buf_sz += len;
640         else
641             break;
642     }
643 
644     if (len < 0) goto err;
645     mem_fp = hfile_init_fixed(sizeof(hFILE), "r", buf, buf_sz, buf_a);
646     if (!mem_fp) goto err;
647     mem_fp->backend = &mem_backend;
648 
649     if (hclose(fp) < 0) {
650         hclose_abruptly(mem_fp);
651         goto err;
652     }
653     return mem_fp;
654 
655  err:
656     free(buf);
657     hclose_abruptly(fp);
658     return NULL;
659 }
660 
is_preload_url_remote(const char * url)661 static int is_preload_url_remote(const char *url){
662     return hisremote(url + 8); // len("preload:") = 8
663 }
664 
hopen_preload(const char * url,const char * mode)665 static hFILE *hopen_preload(const char *url, const char *mode){
666     hFILE* fp = hopen(url + 8, mode);
667     return hpreload(fp);
668 }
669 
hdopen(int fd,const char * mode)670 hFILE *hdopen(int fd, const char *mode)
671 {
672     hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
673     if (fp == NULL) return NULL;
674 
675     fp->fd = fd;
676     fp->is_socket = (strchr(mode, 's') != NULL);
677     fp->base.backend = &fd_backend;
678     return &fp->base;
679 }
680 
hopen_fd_fileuri(const char * url,const char * mode)681 static hFILE *hopen_fd_fileuri(const char *url, const char *mode)
682 {
683     if (strncmp(url, "file://localhost/", 17) == 0) url += 16;
684     else if (strncmp(url, "file:///", 8) == 0) url += 7;
685     else { errno = EPROTONOSUPPORT; return NULL; }
686 
687 #ifdef _WIN32
688     // For cases like C:/foo
689     if (url[0] == '/' && url[2] == ':' && url[3] == '/') url++;
690 #endif
691 
692     return hopen_fd(url, mode);
693 }
694 
hopen_fd_stdinout(const char * mode)695 static hFILE *hopen_fd_stdinout(const char *mode)
696 {
697     int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO;
698 #if defined HAVE_SETMODE && defined O_BINARY
699     if (setmode(fd, O_BINARY) < 0) return NULL;
700 #endif
701     return hdopen(fd, mode);
702 }
703 
hfile_oflags(const char * mode)704 int hfile_oflags(const char *mode)
705 {
706     int rdwr = 0, flags = 0;
707     const char *s;
708     for (s = mode; *s; s++)
709         switch (*s) {
710         case 'r': rdwr = O_RDONLY;  break;
711         case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC;  break;
712         case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND;  break;
713         case '+': rdwr = O_RDWR;  break;
714 #ifdef O_CLOEXEC
715         case 'e': flags |= O_CLOEXEC;  break;
716 #endif
717 #ifdef O_EXCL
718         case 'x': flags |= O_EXCL;  break;
719 #endif
720         default:  break;
721         }
722 
723 #ifdef O_BINARY
724     flags |= O_BINARY;
725 #endif
726 
727     return rdwr | flags;
728 }
729 
730 
731 /*********************
732  * In-memory backend *
733  *********************/
734 
735 #include "hts_internal.h"
736 
737 typedef struct {
738     hFILE base;
739 } hFILE_mem;
740 
mem_seek(hFILE * fpv,off_t offset,int whence)741 static off_t mem_seek(hFILE *fpv, off_t offset, int whence)
742 {
743     errno = EINVAL;
744     return -1;
745 }
746 
mem_close(hFILE * fpv)747 static int mem_close(hFILE *fpv)
748 {
749     return 0;
750 }
751 
752 static const struct hFILE_backend mem_backend =
753 {
754     NULL, NULL, mem_seek, NULL, mem_close
755 };
756 
cmp_prefix(const char * key,const char * s)757 static int cmp_prefix(const char *key, const char *s)
758 {
759     while (*key)
760         if (tolower_c(*s) != *key) return +1;
761         else s++, key++;
762 
763     return 0;
764 }
765 
create_hfile_mem(char * buffer,const char * mode,size_t buf_filled,size_t buf_size)766 static hFILE *create_hfile_mem(char* buffer, const char* mode, size_t buf_filled, size_t buf_size)
767 {
768     hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, buf_filled, buf_size);
769     if (fp == NULL)
770         return NULL;
771 
772     fp->base.backend = &mem_backend;
773     return &fp->base;
774 }
775 
hopen_mem(const char * url,const char * mode)776 static hFILE *hopen_mem(const char *url, const char *mode)
777 {
778     size_t length, size;
779     char *buffer;
780     const char *data, *comma = strchr(url, ',');
781     if (comma == NULL) { errno = EINVAL; return NULL; }
782     data = comma+1;
783 
784     // TODO Implement write modes
785     if (strchr(mode, 'r') == NULL) { errno = EROFS; return NULL; }
786 
787     if (comma - url >= 7 && cmp_prefix(";base64", &comma[-7]) == 0) {
788         size = hts_base64_decoded_length(strlen(data));
789         buffer = malloc(size);
790         if (buffer == NULL) return NULL;
791         hts_decode_base64(buffer, &length, data);
792     }
793     else {
794         size = strlen(data) + 1;
795         buffer = malloc(size);
796         if (buffer == NULL) return NULL;
797         hts_decode_percent(buffer, &length, data);
798     }
799     hFILE* hf;
800 
801     if(!(hf = create_hfile_mem(buffer, mode, length, size))){
802         free(buffer);
803         return NULL;
804     }
805 
806     return hf;
807 }
808 
hopenv_mem(const char * filename,const char * mode,va_list args)809 hFILE *hopenv_mem(const char *filename, const char *mode, va_list args)
810 {
811     char* buffer = va_arg(args, char*);
812     size_t sz = va_arg(args, size_t);
813     va_end(args);
814 
815     hFILE* hf;
816 
817     if(!(hf = create_hfile_mem(buffer, mode, sz, sz))){
818         free(buffer);
819         return NULL;
820     }
821 
822     return hf;
823 }
824 
hfile_mem_get_buffer(hFILE * file,size_t * length)825 char *hfile_mem_get_buffer(hFILE *file, size_t *length) {
826     if (file->backend != &mem_backend) {
827         errno = EINVAL;
828         return NULL;
829     }
830 
831     if (length)
832         *length = file->buffer - file->limit;
833 
834     return file->buffer;
835 }
836 
hfile_mem_steal_buffer(hFILE * file,size_t * length)837 char *hfile_mem_steal_buffer(hFILE *file, size_t *length) {
838     char *buf = hfile_mem_get_buffer(file, length);
839     if (buf)
840         file->buffer = NULL;
841     return buf;
842 }
843 
hfile_plugin_init_mem(struct hFILE_plugin * self)844 int hfile_plugin_init_mem(struct hFILE_plugin *self)
845 {
846     // mem files are declared remote so they work with a tabix index
847     static const struct hFILE_scheme_handler handler =
848             {NULL, hfile_always_remote, "mem", 2000 + 50, hopenv_mem};
849     self->name = "mem";
850     hfile_add_scheme_handler("mem", &handler);
851     return 0;
852 }
853 
854 
855 /*****************************************
856  * Plugin and hopen() backend dispatcher *
857  *****************************************/
858 
859 #include "htslib/khash.h"
860 
861 KHASH_MAP_INIT_STR(scheme_string, const struct hFILE_scheme_handler *)
862 static khash_t(scheme_string) *schemes = NULL;
863 
864 struct hFILE_plugin_list {
865     struct hFILE_plugin plugin;
866     struct hFILE_plugin_list *next;
867 };
868 
869 static struct hFILE_plugin_list *plugins = NULL;
870 static pthread_mutex_t plugins_lock = PTHREAD_MUTEX_INITIALIZER;
871 
hfile_exit()872 static void hfile_exit()
873 {
874     pthread_mutex_lock(&plugins_lock);
875 
876     kh_destroy(scheme_string, schemes);
877 
878     while (plugins != NULL) {
879         struct hFILE_plugin_list *p = plugins;
880         if (p->plugin.destroy) p->plugin.destroy();
881 #ifdef ENABLE_PLUGINS
882         if (p->plugin.obj) close_plugin(p->plugin.obj);
883 #endif
884         plugins = p->next;
885         free(p);
886     }
887 
888     pthread_mutex_unlock(&plugins_lock);
889     pthread_mutex_destroy(&plugins_lock);
890 }
891 
priority(const struct hFILE_scheme_handler * handler)892 static inline int priority(const struct hFILE_scheme_handler *handler)
893 {
894     return handler->priority % 1000;
895 }
896 
hfile_add_scheme_handler(const char * scheme,const struct hFILE_scheme_handler * handler)897 void hfile_add_scheme_handler(const char *scheme,
898                               const struct hFILE_scheme_handler *handler)
899 {
900     int absent;
901     khint_t k = kh_put(scheme_string, schemes, scheme, &absent);
902     if (absent || priority(handler) > priority(kh_value(schemes, k))) {
903         kh_value(schemes, k) = handler;
904     }
905 }
906 
init_add_plugin(void * obj,int (* init)(struct hFILE_plugin *),const char * pluginname)907 static int init_add_plugin(void *obj, int (*init)(struct hFILE_plugin *),
908                            const char *pluginname)
909 {
910     struct hFILE_plugin_list *p = malloc (sizeof (struct hFILE_plugin_list));
911     if (p == NULL) abort();
912 
913     p->plugin.api_version = 1;
914     p->plugin.obj = obj;
915     p->plugin.name = NULL;
916     p->plugin.destroy = NULL;
917 
918     int ret = (*init)(&p->plugin);
919 
920     if (ret != 0) {
921         hts_log_debug("Initialisation failed for plugin \"%s\": %d", pluginname, ret);
922         free(p);
923         return ret;
924     }
925 
926     hts_log_debug("Loaded \"%s\"", pluginname);
927 
928     p->next = plugins, plugins = p;
929     return 0;
930 }
931 
load_hfile_plugins()932 static void load_hfile_plugins()
933 {
934     static const struct hFILE_scheme_handler
935         data = { hopen_mem, hfile_always_local, "built-in", 80 },
936         file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 },
937         preload = { hopen_preload, is_preload_url_remote, "built-in", 80 };
938 
939     schemes = kh_init(scheme_string);
940     if (schemes == NULL) abort();
941 
942     hfile_add_scheme_handler("data", &data);
943     hfile_add_scheme_handler("file", &file);
944     hfile_add_scheme_handler("preload", &preload);
945     init_add_plugin(NULL, hfile_plugin_init_net, "knetfile");
946     init_add_plugin(NULL, hfile_plugin_init_mem, "mem");
947 
948 #ifdef ENABLE_PLUGINS
949     struct hts_path_itr path;
950     const char *pluginname;
951     hts_path_itr_setup(&path, NULL, NULL, "hfile_", 6, NULL, 0);
952     while ((pluginname = hts_path_itr_next(&path)) != NULL) {
953         void *obj;
954         int (*init)(struct hFILE_plugin *) = (int (*)(struct hFILE_plugin *))
955             load_plugin(&obj, pluginname, "hfile_plugin_init");
956 
957         if (init) {
958             if (init_add_plugin(obj, init, pluginname) != 0)
959                 close_plugin(obj);
960         }
961     }
962 #else
963 
964 #ifdef HAVE_LIBCURL
965     init_add_plugin(NULL, hfile_plugin_init_libcurl, "libcurl");
966 #endif
967 #ifdef ENABLE_GCS
968     init_add_plugin(NULL, hfile_plugin_init_gcs, "gcs");
969 #endif
970 #ifdef ENABLE_S3
971     init_add_plugin(NULL, hfile_plugin_init_s3, "s3");
972 #endif
973 
974 #endif
975 
976     // In the unlikely event atexit() fails, it's better to succeed here and
977     // carry on; then eventually when the program exits, we'll merely close
978     // down the plugins uncleanly, as if we had aborted.
979     (void) atexit(hfile_exit);
980 }
981 
982 /* A filename like "foo:bar" in which we don't recognise the scheme is
983    either an ordinary file or an indication of a missing or broken plugin.
984    Try to open it as an ordinary file; but if there's no such file, set
985    errno distinctively to make the plugin issue apparent.  */
hopen_unknown_scheme(const char * fname,const char * mode)986 static hFILE *hopen_unknown_scheme(const char *fname, const char *mode)
987 {
988     hFILE *fp = hopen_fd(fname, mode);
989     if (fp == NULL && errno == ENOENT) errno = EPROTONOSUPPORT;
990     return fp;
991 }
992 
993 /* Returns the appropriate handler, or NULL if the string isn't an URL.  */
find_scheme_handler(const char * s)994 static const struct hFILE_scheme_handler *find_scheme_handler(const char *s)
995 {
996     static const struct hFILE_scheme_handler unknown_scheme =
997         { hopen_unknown_scheme, hfile_always_local, "built-in", 0 };
998 
999     char scheme[12];
1000     int i;
1001 
1002     for (i = 0; i < sizeof scheme; i++)
1003         if (isalnum_c(s[i]) || s[i] == '+' || s[i] == '-' || s[i] == '.')
1004             scheme[i] = tolower_c(s[i]);
1005         else if (s[i] == ':') break;
1006         else return NULL;
1007 
1008     // 1 byte schemes are likely windows C:/foo pathnames
1009     if (i <= 1 || i >= sizeof scheme) return NULL;
1010     scheme[i] = '\0';
1011 
1012     pthread_mutex_lock(&plugins_lock);
1013     if (! schemes) load_hfile_plugins();
1014     pthread_mutex_unlock(&plugins_lock);
1015 
1016     khint_t k = kh_get(scheme_string, schemes, scheme);
1017     return (k != kh_end(schemes))? kh_value(schemes, k) : &unknown_scheme;
1018 }
1019 
hopen(const char * fname,const char * mode,...)1020 hFILE *hopen(const char *fname, const char *mode, ...)
1021 {
1022     const struct hFILE_scheme_handler *handler = find_scheme_handler(fname);
1023     if (handler) {
1024         if (strchr(mode, ':') == NULL
1025             || handler->priority < 2000
1026             || handler->vopen == NULL) {
1027             return handler->open(fname, mode);
1028         }
1029         else {
1030             hFILE *fp;
1031             va_list arg;
1032             va_start(arg, mode);
1033             fp = handler->vopen(fname, mode, arg);
1034             va_end(arg);
1035             return fp;
1036         }
1037     }
1038     else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode);
1039     else return hopen_fd(fname, mode);
1040 }
1041 
hfile_always_local(const char * fname)1042 int hfile_always_local (const char *fname) { return 0; }
hfile_always_remote(const char * fname)1043 int hfile_always_remote(const char *fname) { return 1; }
1044 
hisremote(const char * fname)1045 int hisremote(const char *fname)
1046 {
1047     const struct hFILE_scheme_handler *handler = find_scheme_handler(fname);
1048     return handler? handler->isremote(fname) : 0;
1049 }
1050