1 /*  hfile.c -- buffered low-level input/output streams.
2 
3     Copyright (C) 2013-2015 Genome Research Ltd.
4 
5     Author: John Marshall <jm18@sanger.ac.uk>
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE.  */
24 
25 #include "plink_common.h"
26 #include <errno.h>
27 
28 #include "hfile.h"
29 #include "hfile_internal.h"
30 
31 /* hFILE fields are used as follows:
32 
33    char *buffer;     // Pointer to the start of the I/O buffer
34    char *begin;      // First not-yet-read character / unused position
35    char *end;        // First unfilled/unfillable position
36    char *limit;      // Pointer to the first position past the buffer
37 
38    const hFILE_backend *backend;  // Methods to refill/flush I/O buffer
39 
40    off_t offset;     // Offset within the stream of buffer position 0
41    int at_eof:1;     // For reading, whether EOF has been seen
42    int has_errno;    // Error number from the last failure on this stream
43 
44 For reading, begin is the first unread character in the buffer and end is the
45 first unfilled position:
46 
47    -----------ABCDEFGHIJKLMNO---------------
48    ^buffer    ^begin         ^end           ^limit
49 
50 For writing, begin is the first unused position and end is unused so remains
51 equal to buffer:
52 
53    ABCDEFGHIJKLMNOPQRSTUVWXYZ---------------
54    ^buffer                   ^begin         ^limit
55    ^end
56 
57 Thus if begin > end then there is a non-empty write buffer, if begin < end
58 then there is a non-empty read buffer, and if begin == end then both buffers
59 are empty.  In all cases, the stream's file position indicator corresponds
60 to the position pointed to by begin.  */
61 
hfile_init(size_t struct_size,const char * mode,size_t capacity)62 hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity)
63 {
64     hFILE *fp = (hFILE *) malloc(struct_size);
65     if (fp == NULL) goto error;
66 
67     if (capacity == 0) capacity = 32768;
68     // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory
69     if (strchr(mode, 'r') && capacity > 32768) capacity = 32768;
70 
71     fp->buffer = (char *) malloc(capacity);
72     if (fp->buffer == NULL) goto error;
73 
74     fp->begin = fp->end = fp->buffer;
75     fp->limit = &fp->buffer[capacity];
76 
77     fp->offset = 0;
78     fp->at_eof = 0;
79     fp->has_errno = 0;
80     return fp;
81 
82 error:
83     hfile_destroy(fp);
84     return NULL;
85 }
86 
hfile_destroy(hFILE * fp)87 void hfile_destroy(hFILE *fp)
88 {
89     int save = errno;
90     if (fp) free(fp->buffer);
91     free(fp);
92     errno = save;
93 }
94 
writebuffer_is_nonempty(hFILE * fp)95 static inline int writebuffer_is_nonempty(hFILE *fp)
96 {
97     return fp->begin > fp->end;
98 }
99 
100 /* Refills the read buffer from the backend (once, so may only partially
101    fill the buffer), returning the number of additional characters read
102    (which might be 0), or negative when an error occurred.  */
refill_buffer(hFILE * fp)103 static ssize_t refill_buffer(hFILE *fp)
104 {
105     ssize_t n;
106 
107     // Move any unread characters to the start of the buffer
108     if (fp->begin > fp->buffer) {
109         fp->offset += fp->begin - fp->buffer;
110         memmove(fp->buffer, fp->begin, fp->end - fp->begin);
111         fp->end = &fp->buffer[fp->end - fp->begin];
112         fp->begin = fp->buffer;
113     }
114 
115     // Read into the available buffer space at fp->[end,limit)
116     if (fp->at_eof || fp->end == fp->limit) n = 0;
117     else {
118         n = fp->backend->read(fp, fp->end, fp->limit - fp->end);
119         if (n < 0) { fp->has_errno = errno; return n; }
120         else if (n == 0) fp->at_eof = 1;
121     }
122 
123     fp->end += n;
124     return n;
125 }
126 
127 /* Called only from hgetc(), when our buffer is empty.  */
hgetc2(hFILE * fp)128 int hgetc2(hFILE *fp)
129 {
130     return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF;
131 }
132 
hpeek(hFILE * fp,void * buffer,size_t nbytes)133 ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes)
134 {
135     size_t n = fp->end - fp->begin;
136     while (n < nbytes) {
137         ssize_t ret = refill_buffer(fp);
138         if (ret < 0) return ret;
139         else if (ret == 0) break;
140         else n += ret;
141     }
142 
143     if (n > nbytes) n = nbytes;
144     memcpy(buffer, fp->begin, n);
145     return n;
146 }
147 
148 /* Called only from hread(); when called, our buffer is empty and nread bytes
149    have already been placed in the destination buffer.  */
hread2(hFILE * fp,void * destv,size_t nbytes,size_t nread)150 ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread)
151 {
152     const size_t capacity = fp->limit - fp->buffer;
153     char *dest = (char *) destv;
154     dest += nread, nbytes -= nread;
155 
156     // Read large requests directly into the destination buffer
157     while (nbytes * 2 >= capacity && !fp->at_eof) {
158         ssize_t n = fp->backend->read(fp, dest, nbytes);
159         if (n < 0) { fp->has_errno = errno; return n; }
160         else if (n == 0) fp->at_eof = 1;
161         fp->offset += n;
162         dest += n, nbytes -= n;
163         nread += n;
164     }
165 
166     while (nbytes > 0 && !fp->at_eof) {
167         size_t n;
168         ssize_t ret = refill_buffer(fp);
169         if (ret < 0) return ret;
170 
171         n = fp->end - fp->begin;
172         if (n > nbytes) n = nbytes;
173         memcpy(dest, fp->begin, n);
174         fp->begin += n;
175         dest += n, nbytes -= n;
176         nread += n;
177     }
178 
179     return nread;
180 }
181 
182 /* Flushes the write buffer, fp->[buffer,begin), out through the backend
183    returning 0 on success or negative if an error occurred.  */
flush_buffer(hFILE * fp)184 static ssize_t flush_buffer(hFILE *fp)
185 {
186     const char *buffer = fp->buffer;
187     while (buffer < fp->begin) {
188         ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer);
189         if (n < 0) { fp->has_errno = errno; return n; }
190         buffer += n;
191         fp->offset += n;
192     }
193 
194     fp->begin = fp->buffer;  // Leave the buffer empty
195     return 0;
196 }
197 
hflush(hFILE * fp)198 int hflush(hFILE *fp)
199 {
200     if (flush_buffer(fp) < 0) return EOF;
201     if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; }
202     return 0;
203 }
204 
205 /* Called only from hputc(), when our buffer is already full.  */
hputc2(int c,hFILE * fp)206 int hputc2(int c, hFILE *fp)
207 {
208     if (flush_buffer(fp) < 0) return EOF;
209     *(fp->begin++) = c;
210     return c;
211 }
212 
213 /* Called only from hwrite() and hputs2(); when called, our buffer is full and
214    ncopied bytes from the source have already been copied to our buffer.  */
hwrite2(hFILE * fp,const void * srcv,size_t totalbytes,size_t ncopied)215 ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied)
216 {
217     const char *src = (const char *) srcv;
218     ssize_t ret;
219     const size_t capacity = fp->limit - fp->buffer;
220     size_t remaining = totalbytes - ncopied;
221     src += ncopied;
222 
223     ret = flush_buffer(fp);
224     if (ret < 0) return ret;
225 
226     // Write large blocks out directly from the source buffer
227     while (remaining * 2 >= capacity) {
228         ssize_t n = fp->backend->write(fp, src, remaining);
229         if (n < 0) { fp->has_errno = errno; return n; }
230         fp->offset += n;
231         src += n, remaining -= n;
232     }
233 
234     // Just buffer any remaining characters
235     memcpy(fp->begin, src, remaining);
236     fp->begin += remaining;
237 
238     return totalbytes;
239 }
240 
241 /* Called only from hputs(), when our buffer is already full.  */
hputs2(const char * text,size_t totalbytes,size_t ncopied,hFILE * fp)242 int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp)
243 {
244     return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF;
245 }
246 
hseek(hFILE * fp,off_t offset,int whence)247 off_t hseek(hFILE *fp, off_t offset, int whence)
248 {
249     off_t pos;
250 
251     if (writebuffer_is_nonempty(fp)) {
252         int ret = flush_buffer(fp);
253         if (ret < 0) return ret;
254     }
255     else {
256         // Convert relative offsets from being relative to the hFILE's stream
257         // position (at begin) to being relative to the backend's physical
258         // stream position (at end, due to the buffering read-ahead).
259         if (whence == SEEK_CUR) offset -= fp->end - fp->begin;
260     }
261 
262     pos = fp->backend->seek(fp, offset, whence);
263     if (pos < 0) { fp->has_errno = errno; return pos; }
264 
265     // Seeking succeeded, so discard any non-empty read buffer
266     fp->begin = fp->end = fp->buffer;
267     fp->at_eof = 0;
268 
269     fp->offset = pos;
270     return pos;
271 }
272 
hclose(hFILE * fp)273 int hclose(hFILE *fp)
274 {
275     int err = fp->has_errno;
276 
277     if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno;
278     if (fp->backend->close(fp) < 0) err = errno;
279     hfile_destroy(fp);
280 
281     if (err) {
282         errno = err;
283         return EOF;
284     }
285     else return 0;
286 }
287 
hclose_abruptly(hFILE * fp)288 void hclose_abruptly(hFILE *fp)
289 {
290     int save = errno;
291     if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ }
292     hfile_destroy(fp);
293     errno = save;
294 }
295 
296 
297 /***************************
298  * File descriptor backend *
299  ***************************/
300 
301 // #include <sys/socket.h>
302 #include <sys/stat.h>
303 #include <fcntl.h>
304 #include <unistd.h>
305 
306 // #ifdef _WIN32
307 // #define HAVE_CLOSESOCKET
308 // #endif
309 
310 /* For Unix, it doesn't matter whether a file descriptor is a socket.
311    However Windows insists on send()/recv() and its own closesocket()
312    being used when fd happens to be a socket.  */
313 
314 typedef struct {
315     hFILE base;
316     int fd;
317   // int is_socket:1;
318 } hFILE_fd;
319 
fd_read(hFILE * fpv,void * buffer,size_t nbytes)320 static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes)
321 {
322     hFILE_fd *fp = (hFILE_fd *) fpv;
323     ssize_t n;
324     do {
325       /*
326         n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0)
327                          : read(fp->fd, buffer, nbytes);
328       */
329         n = read(fp->fd, buffer, nbytes);
330     } while (n < 0 && errno == EINTR);
331     return n;
332 }
333 
fd_write(hFILE * fpv,const void * buffer,size_t nbytes)334 static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes)
335 {
336     hFILE_fd *fp = (hFILE_fd *) fpv;
337     ssize_t n;
338     do {
339       /*
340         n = fp->is_socket?  send(fp->fd, buffer, nbytes, 0)
341                          : write(fp->fd, buffer, nbytes);
342       */
343         n = write(fp->fd, buffer, nbytes);
344     } while (n < 0 && errno == EINTR);
345     return n;
346 }
347 
fd_seek(hFILE * fpv,off_t offset,int whence)348 static off_t fd_seek(hFILE *fpv, off_t offset, int whence)
349 {
350     hFILE_fd *fp = (hFILE_fd *) fpv;
351     return lseek(fp->fd, offset, whence);
352 }
353 
fd_flush(hFILE * fpv)354 static int fd_flush(hFILE *fpv)
355 {
356     hFILE_fd *fp = (hFILE_fd *) fpv;
357 #ifdef _WIN32
358     // See the patch at
359     // https://lists.gnu.org/archive/html/bug-gnulib/2008-10/msg00004.html .
360     HANDLE hh = (HANDLE)_get_osfhandle(fp->fd);
361     DWORD err;
362     if (hh == INVALID_HANDLE_VALUE) {
363         errno = EBADF;
364         return -1;
365     }
366     if (!FlushFileBuffers(hh)) {
367         err = GetLastError();
368         switch (err) {
369         case ERROR_INVALID_HANDLE:
370 	    errno = EINVAL;
371 	    break;
372 	default:
373 	    errno = EIO;
374 	}
375 	return -1;
376     }
377     return 0;
378 #else
379     int ret;
380     do {
381         ret = fsync(fp->fd);
382         // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe,
383         // and operation-not-supported errors (Mac OS X)
384         if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0;
385     } while (ret < 0 && errno == EINTR);
386     return ret;
387 #endif
388 }
389 
fd_close(hFILE * fpv)390 static int fd_close(hFILE *fpv)
391 {
392     hFILE_fd *fp = (hFILE_fd *) fpv;
393     int ret;
394     do {
395 #ifdef HAVE_CLOSESOCKET
396         ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd);
397 #else
398         ret = close(fp->fd);
399 #endif
400     } while (ret < 0 && errno == EINTR);
401     return ret;
402 }
403 
404 static const struct hFILE_backend fd_backend =
405 {
406     fd_read, fd_write, fd_seek, fd_flush, fd_close
407 };
408 
blksize(int fd)409 static size_t blksize(int fd)
410 {
411     struct stat sbuf;
412     if (fstat(fd, &sbuf) != 0) return 0;
413 #ifdef _WIN32
414     return 512;
415 #else
416     return sbuf.st_blksize;
417 #endif
418 }
419 
hopen_fd(const char * filename,const char * mode)420 static hFILE *hopen_fd(const char *filename, const char *mode)
421 {
422     hFILE_fd *fp = NULL;
423     int fd = open(filename, hfile_oflags(mode), 0666);
424     if (fd < 0) goto error;
425 
426     fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
427     if (fp == NULL) goto error;
428 
429     fp->fd = fd;
430     // fp->is_socket = 0;
431     fp->base.backend = &fd_backend;
432     return &fp->base;
433 
434 error:
435     if (fd >= 0) { int save = errno; (void) close(fd); errno = save; }
436     hfile_destroy((hFILE *) fp);
437     return NULL;
438 }
439 
hdopen(int fd,const char * mode)440 hFILE *hdopen(int fd, const char *mode)
441 {
442     hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
443     if (fp == NULL) return NULL;
444 
445     fp->fd = fd;
446     // fp->is_socket = (strchr(mode, 's') != NULL);
447     fp->base.backend = &fd_backend;
448     return &fp->base;
449 }
450 
hopen_fd_stdinout(const char * mode)451 static hFILE *hopen_fd_stdinout(const char *mode)
452 {
453     int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO;
454     // TODO Set binary mode (for Windows)
455     return hdopen(fd, mode);
456 }
457 
hfile_oflags(const char * mode)458 int hfile_oflags(const char *mode)
459 {
460     int rdwr = 0, flags = 0;
461     const char *s;
462     for (s = mode; *s; s++)
463         switch (*s) {
464         case 'r': rdwr = O_RDONLY;  break;
465         case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC;  break;
466         case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND;  break;
467         case '+': rdwr = O_RDWR;  break;
468         default:  break;
469         }
470 
471 #ifdef O_BINARY
472     flags |= O_BINARY;
473 #endif
474 
475     return rdwr | flags;
476 }
477 
478 
479 /*********************
480  * In-memory backend *
481  *********************/
482 
483 typedef struct {
484     hFILE base;
485     const char *buffer;
486     size_t length, pos;
487 } hFILE_mem;
488 
489 /*
490 static ssize_t mem_read(hFILE *fpv, void *buffer, size_t nbytes)
491 {
492     hFILE_mem *fp = (hFILE_mem *) fpv;
493     size_t avail = fp->length - fp->pos;
494     if (nbytes > avail) nbytes = avail;
495     memcpy(buffer, fp->buffer + fp->pos, nbytes);
496     fp->pos += nbytes;
497     return nbytes;
498 }
499 
500 static off_t mem_seek(hFILE *fpv, off_t offset, int whence)
501 {
502     hFILE_mem *fp = (hFILE_mem *) fpv;
503     size_t absoffset = (offset >= 0)? offset : -offset;
504     size_t origin;
505 
506     switch (whence) {
507     case SEEK_SET: origin = 0; break;
508     case SEEK_CUR: origin = fp->pos; break;
509     case SEEK_END: origin = fp->length; break;
510     default: errno = EINVAL; return -1;
511     }
512 
513     if ((offset  < 0 && absoffset > origin) ||
514         (offset >= 0 && absoffset > fp->length - origin)) {
515         errno = EINVAL;
516         return -1;
517     }
518 
519     fp->pos = origin + offset;
520     return fp->pos;
521 }
522 
523 static int mem_close(hFILE *fpv)
524 {
525     return 0;
526 }
527 
528 static const struct hFILE_backend mem_backend =
529 {
530     mem_read, NULL, mem_seek, NULL, mem_close
531 };
532 
533 static hFILE *hopen_mem(const char *data, const char *mode)
534 {
535     // TODO Implement write modes, which will require memory allocation
536     if (strchr(mode, 'r') == NULL) { errno = EINVAL; return NULL; }
537 
538     hFILE_mem *fp = (hFILE_mem *) hfile_init(sizeof (hFILE_mem), mode, 0);
539     if (fp == NULL) return NULL;
540 
541     fp->buffer = data;
542     fp->length = strlen(data);
543     fp->pos = 0;
544     fp->base.backend = &mem_backend;
545     return &fp->base;
546 }
547 */
548 
549 
550 /******************************
551  * hopen() backend dispatcher *
552  ******************************/
553 
hopen(const char * fname,const char * mode)554 hFILE *hopen(const char *fname, const char *mode)
555 {
556   // if (strncmp(fname, "http://", 7) == 0 ||
557   //      strncmp(fname, "ftp://", 6) == 0) return hopen_net(fname, mode);
558 #ifdef HAVE_IRODS
559   // else if (strncmp(fname, "irods:", 6) == 0) return hopen_irods(fname, mode);
560 #endif
561   // else if (strncmp(fname, "data:", 5) == 0) return hopen_mem(fname + 5, mode);
562     if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode);
563     else return hopen_fd(fname, mode);
564 }
565 
566 /*
567 int hisremote(const char *fname)
568 {
569     // FIXME Make a new backend entry to return this
570     if (strncmp(fname, "http://", 7) == 0 ||
571         strncmp(fname, "https://", 8) == 0 ||
572         strncmp(fname, "ftp://", 6) == 0) return 1;
573 #ifdef HAVE_IRODS
574     else if (strncmp(fname, "irods:", 6) == 0) return 1;
575 #endif
576     else return 0;
577 }
578 */
579