1 /* hfile.c -- buffered low-level input/output streams.
2
3 Copyright (C) 2013-2015 Genome Research Ltd.
4
5 Author: John Marshall <jm18@sanger.ac.uk>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
24
25 #include "plink_common.h"
26 #include <errno.h>
27
28 #include "hfile.h"
29 #include "hfile_internal.h"
30
31 /* hFILE fields are used as follows:
32
33 char *buffer; // Pointer to the start of the I/O buffer
34 char *begin; // First not-yet-read character / unused position
35 char *end; // First unfilled/unfillable position
36 char *limit; // Pointer to the first position past the buffer
37
38 const hFILE_backend *backend; // Methods to refill/flush I/O buffer
39
40 off_t offset; // Offset within the stream of buffer position 0
41 int at_eof:1; // For reading, whether EOF has been seen
42 int has_errno; // Error number from the last failure on this stream
43
44 For reading, begin is the first unread character in the buffer and end is the
45 first unfilled position:
46
47 -----------ABCDEFGHIJKLMNO---------------
48 ^buffer ^begin ^end ^limit
49
50 For writing, begin is the first unused position and end is unused so remains
51 equal to buffer:
52
53 ABCDEFGHIJKLMNOPQRSTUVWXYZ---------------
54 ^buffer ^begin ^limit
55 ^end
56
57 Thus if begin > end then there is a non-empty write buffer, if begin < end
58 then there is a non-empty read buffer, and if begin == end then both buffers
59 are empty. In all cases, the stream's file position indicator corresponds
60 to the position pointed to by begin. */
61
hfile_init(size_t struct_size,const char * mode,size_t capacity)62 hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity)
63 {
64 hFILE *fp = (hFILE *) malloc(struct_size);
65 if (fp == NULL) goto error;
66
67 if (capacity == 0) capacity = 32768;
68 // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory
69 if (strchr(mode, 'r') && capacity > 32768) capacity = 32768;
70
71 fp->buffer = (char *) malloc(capacity);
72 if (fp->buffer == NULL) goto error;
73
74 fp->begin = fp->end = fp->buffer;
75 fp->limit = &fp->buffer[capacity];
76
77 fp->offset = 0;
78 fp->at_eof = 0;
79 fp->has_errno = 0;
80 return fp;
81
82 error:
83 hfile_destroy(fp);
84 return NULL;
85 }
86
hfile_destroy(hFILE * fp)87 void hfile_destroy(hFILE *fp)
88 {
89 int save = errno;
90 if (fp) free(fp->buffer);
91 free(fp);
92 errno = save;
93 }
94
writebuffer_is_nonempty(hFILE * fp)95 static inline int writebuffer_is_nonempty(hFILE *fp)
96 {
97 return fp->begin > fp->end;
98 }
99
100 /* Refills the read buffer from the backend (once, so may only partially
101 fill the buffer), returning the number of additional characters read
102 (which might be 0), or negative when an error occurred. */
refill_buffer(hFILE * fp)103 static ssize_t refill_buffer(hFILE *fp)
104 {
105 ssize_t n;
106
107 // Move any unread characters to the start of the buffer
108 if (fp->begin > fp->buffer) {
109 fp->offset += fp->begin - fp->buffer;
110 memmove(fp->buffer, fp->begin, fp->end - fp->begin);
111 fp->end = &fp->buffer[fp->end - fp->begin];
112 fp->begin = fp->buffer;
113 }
114
115 // Read into the available buffer space at fp->[end,limit)
116 if (fp->at_eof || fp->end == fp->limit) n = 0;
117 else {
118 n = fp->backend->read(fp, fp->end, fp->limit - fp->end);
119 if (n < 0) { fp->has_errno = errno; return n; }
120 else if (n == 0) fp->at_eof = 1;
121 }
122
123 fp->end += n;
124 return n;
125 }
126
127 /* Called only from hgetc(), when our buffer is empty. */
hgetc2(hFILE * fp)128 int hgetc2(hFILE *fp)
129 {
130 return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF;
131 }
132
hpeek(hFILE * fp,void * buffer,size_t nbytes)133 ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes)
134 {
135 size_t n = fp->end - fp->begin;
136 while (n < nbytes) {
137 ssize_t ret = refill_buffer(fp);
138 if (ret < 0) return ret;
139 else if (ret == 0) break;
140 else n += ret;
141 }
142
143 if (n > nbytes) n = nbytes;
144 memcpy(buffer, fp->begin, n);
145 return n;
146 }
147
148 /* Called only from hread(); when called, our buffer is empty and nread bytes
149 have already been placed in the destination buffer. */
hread2(hFILE * fp,void * destv,size_t nbytes,size_t nread)150 ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread)
151 {
152 const size_t capacity = fp->limit - fp->buffer;
153 char *dest = (char *) destv;
154 dest += nread, nbytes -= nread;
155
156 // Read large requests directly into the destination buffer
157 while (nbytes * 2 >= capacity && !fp->at_eof) {
158 ssize_t n = fp->backend->read(fp, dest, nbytes);
159 if (n < 0) { fp->has_errno = errno; return n; }
160 else if (n == 0) fp->at_eof = 1;
161 fp->offset += n;
162 dest += n, nbytes -= n;
163 nread += n;
164 }
165
166 while (nbytes > 0 && !fp->at_eof) {
167 size_t n;
168 ssize_t ret = refill_buffer(fp);
169 if (ret < 0) return ret;
170
171 n = fp->end - fp->begin;
172 if (n > nbytes) n = nbytes;
173 memcpy(dest, fp->begin, n);
174 fp->begin += n;
175 dest += n, nbytes -= n;
176 nread += n;
177 }
178
179 return nread;
180 }
181
182 /* Flushes the write buffer, fp->[buffer,begin), out through the backend
183 returning 0 on success or negative if an error occurred. */
flush_buffer(hFILE * fp)184 static ssize_t flush_buffer(hFILE *fp)
185 {
186 const char *buffer = fp->buffer;
187 while (buffer < fp->begin) {
188 ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer);
189 if (n < 0) { fp->has_errno = errno; return n; }
190 buffer += n;
191 fp->offset += n;
192 }
193
194 fp->begin = fp->buffer; // Leave the buffer empty
195 return 0;
196 }
197
hflush(hFILE * fp)198 int hflush(hFILE *fp)
199 {
200 if (flush_buffer(fp) < 0) return EOF;
201 if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; }
202 return 0;
203 }
204
205 /* Called only from hputc(), when our buffer is already full. */
hputc2(int c,hFILE * fp)206 int hputc2(int c, hFILE *fp)
207 {
208 if (flush_buffer(fp) < 0) return EOF;
209 *(fp->begin++) = c;
210 return c;
211 }
212
213 /* Called only from hwrite() and hputs2(); when called, our buffer is full and
214 ncopied bytes from the source have already been copied to our buffer. */
hwrite2(hFILE * fp,const void * srcv,size_t totalbytes,size_t ncopied)215 ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied)
216 {
217 const char *src = (const char *) srcv;
218 ssize_t ret;
219 const size_t capacity = fp->limit - fp->buffer;
220 size_t remaining = totalbytes - ncopied;
221 src += ncopied;
222
223 ret = flush_buffer(fp);
224 if (ret < 0) return ret;
225
226 // Write large blocks out directly from the source buffer
227 while (remaining * 2 >= capacity) {
228 ssize_t n = fp->backend->write(fp, src, remaining);
229 if (n < 0) { fp->has_errno = errno; return n; }
230 fp->offset += n;
231 src += n, remaining -= n;
232 }
233
234 // Just buffer any remaining characters
235 memcpy(fp->begin, src, remaining);
236 fp->begin += remaining;
237
238 return totalbytes;
239 }
240
241 /* Called only from hputs(), when our buffer is already full. */
hputs2(const char * text,size_t totalbytes,size_t ncopied,hFILE * fp)242 int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp)
243 {
244 return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF;
245 }
246
hseek(hFILE * fp,off_t offset,int whence)247 off_t hseek(hFILE *fp, off_t offset, int whence)
248 {
249 off_t pos;
250
251 if (writebuffer_is_nonempty(fp)) {
252 int ret = flush_buffer(fp);
253 if (ret < 0) return ret;
254 }
255 else {
256 // Convert relative offsets from being relative to the hFILE's stream
257 // position (at begin) to being relative to the backend's physical
258 // stream position (at end, due to the buffering read-ahead).
259 if (whence == SEEK_CUR) offset -= fp->end - fp->begin;
260 }
261
262 pos = fp->backend->seek(fp, offset, whence);
263 if (pos < 0) { fp->has_errno = errno; return pos; }
264
265 // Seeking succeeded, so discard any non-empty read buffer
266 fp->begin = fp->end = fp->buffer;
267 fp->at_eof = 0;
268
269 fp->offset = pos;
270 return pos;
271 }
272
hclose(hFILE * fp)273 int hclose(hFILE *fp)
274 {
275 int err = fp->has_errno;
276
277 if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno;
278 if (fp->backend->close(fp) < 0) err = errno;
279 hfile_destroy(fp);
280
281 if (err) {
282 errno = err;
283 return EOF;
284 }
285 else return 0;
286 }
287
hclose_abruptly(hFILE * fp)288 void hclose_abruptly(hFILE *fp)
289 {
290 int save = errno;
291 if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ }
292 hfile_destroy(fp);
293 errno = save;
294 }
295
296
297 /***************************
298 * File descriptor backend *
299 ***************************/
300
301 // #include <sys/socket.h>
302 #include <sys/stat.h>
303 #include <fcntl.h>
304 #include <unistd.h>
305
306 // #ifdef _WIN32
307 // #define HAVE_CLOSESOCKET
308 // #endif
309
310 /* For Unix, it doesn't matter whether a file descriptor is a socket.
311 However Windows insists on send()/recv() and its own closesocket()
312 being used when fd happens to be a socket. */
313
314 typedef struct {
315 hFILE base;
316 int fd;
317 // int is_socket:1;
318 } hFILE_fd;
319
fd_read(hFILE * fpv,void * buffer,size_t nbytes)320 static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes)
321 {
322 hFILE_fd *fp = (hFILE_fd *) fpv;
323 ssize_t n;
324 do {
325 /*
326 n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0)
327 : read(fp->fd, buffer, nbytes);
328 */
329 n = read(fp->fd, buffer, nbytes);
330 } while (n < 0 && errno == EINTR);
331 return n;
332 }
333
fd_write(hFILE * fpv,const void * buffer,size_t nbytes)334 static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes)
335 {
336 hFILE_fd *fp = (hFILE_fd *) fpv;
337 ssize_t n;
338 do {
339 /*
340 n = fp->is_socket? send(fp->fd, buffer, nbytes, 0)
341 : write(fp->fd, buffer, nbytes);
342 */
343 n = write(fp->fd, buffer, nbytes);
344 } while (n < 0 && errno == EINTR);
345 return n;
346 }
347
fd_seek(hFILE * fpv,off_t offset,int whence)348 static off_t fd_seek(hFILE *fpv, off_t offset, int whence)
349 {
350 hFILE_fd *fp = (hFILE_fd *) fpv;
351 return lseek(fp->fd, offset, whence);
352 }
353
fd_flush(hFILE * fpv)354 static int fd_flush(hFILE *fpv)
355 {
356 hFILE_fd *fp = (hFILE_fd *) fpv;
357 #ifdef _WIN32
358 // See the patch at
359 // https://lists.gnu.org/archive/html/bug-gnulib/2008-10/msg00004.html .
360 HANDLE hh = (HANDLE)_get_osfhandle(fp->fd);
361 DWORD err;
362 if (hh == INVALID_HANDLE_VALUE) {
363 errno = EBADF;
364 return -1;
365 }
366 if (!FlushFileBuffers(hh)) {
367 err = GetLastError();
368 switch (err) {
369 case ERROR_INVALID_HANDLE:
370 errno = EINVAL;
371 break;
372 default:
373 errno = EIO;
374 }
375 return -1;
376 }
377 return 0;
378 #else
379 int ret;
380 do {
381 ret = fsync(fp->fd);
382 // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe,
383 // and operation-not-supported errors (Mac OS X)
384 if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0;
385 } while (ret < 0 && errno == EINTR);
386 return ret;
387 #endif
388 }
389
fd_close(hFILE * fpv)390 static int fd_close(hFILE *fpv)
391 {
392 hFILE_fd *fp = (hFILE_fd *) fpv;
393 int ret;
394 do {
395 #ifdef HAVE_CLOSESOCKET
396 ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd);
397 #else
398 ret = close(fp->fd);
399 #endif
400 } while (ret < 0 && errno == EINTR);
401 return ret;
402 }
403
404 static const struct hFILE_backend fd_backend =
405 {
406 fd_read, fd_write, fd_seek, fd_flush, fd_close
407 };
408
blksize(int fd)409 static size_t blksize(int fd)
410 {
411 struct stat sbuf;
412 if (fstat(fd, &sbuf) != 0) return 0;
413 #ifdef _WIN32
414 return 512;
415 #else
416 return sbuf.st_blksize;
417 #endif
418 }
419
hopen_fd(const char * filename,const char * mode)420 static hFILE *hopen_fd(const char *filename, const char *mode)
421 {
422 hFILE_fd *fp = NULL;
423 int fd = open(filename, hfile_oflags(mode), 0666);
424 if (fd < 0) goto error;
425
426 fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
427 if (fp == NULL) goto error;
428
429 fp->fd = fd;
430 // fp->is_socket = 0;
431 fp->base.backend = &fd_backend;
432 return &fp->base;
433
434 error:
435 if (fd >= 0) { int save = errno; (void) close(fd); errno = save; }
436 hfile_destroy((hFILE *) fp);
437 return NULL;
438 }
439
hdopen(int fd,const char * mode)440 hFILE *hdopen(int fd, const char *mode)
441 {
442 hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
443 if (fp == NULL) return NULL;
444
445 fp->fd = fd;
446 // fp->is_socket = (strchr(mode, 's') != NULL);
447 fp->base.backend = &fd_backend;
448 return &fp->base;
449 }
450
hopen_fd_stdinout(const char * mode)451 static hFILE *hopen_fd_stdinout(const char *mode)
452 {
453 int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO;
454 // TODO Set binary mode (for Windows)
455 return hdopen(fd, mode);
456 }
457
hfile_oflags(const char * mode)458 int hfile_oflags(const char *mode)
459 {
460 int rdwr = 0, flags = 0;
461 const char *s;
462 for (s = mode; *s; s++)
463 switch (*s) {
464 case 'r': rdwr = O_RDONLY; break;
465 case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC; break;
466 case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND; break;
467 case '+': rdwr = O_RDWR; break;
468 default: break;
469 }
470
471 #ifdef O_BINARY
472 flags |= O_BINARY;
473 #endif
474
475 return rdwr | flags;
476 }
477
478
479 /*********************
480 * In-memory backend *
481 *********************/
482
483 typedef struct {
484 hFILE base;
485 const char *buffer;
486 size_t length, pos;
487 } hFILE_mem;
488
489 /*
490 static ssize_t mem_read(hFILE *fpv, void *buffer, size_t nbytes)
491 {
492 hFILE_mem *fp = (hFILE_mem *) fpv;
493 size_t avail = fp->length - fp->pos;
494 if (nbytes > avail) nbytes = avail;
495 memcpy(buffer, fp->buffer + fp->pos, nbytes);
496 fp->pos += nbytes;
497 return nbytes;
498 }
499
500 static off_t mem_seek(hFILE *fpv, off_t offset, int whence)
501 {
502 hFILE_mem *fp = (hFILE_mem *) fpv;
503 size_t absoffset = (offset >= 0)? offset : -offset;
504 size_t origin;
505
506 switch (whence) {
507 case SEEK_SET: origin = 0; break;
508 case SEEK_CUR: origin = fp->pos; break;
509 case SEEK_END: origin = fp->length; break;
510 default: errno = EINVAL; return -1;
511 }
512
513 if ((offset < 0 && absoffset > origin) ||
514 (offset >= 0 && absoffset > fp->length - origin)) {
515 errno = EINVAL;
516 return -1;
517 }
518
519 fp->pos = origin + offset;
520 return fp->pos;
521 }
522
523 static int mem_close(hFILE *fpv)
524 {
525 return 0;
526 }
527
528 static const struct hFILE_backend mem_backend =
529 {
530 mem_read, NULL, mem_seek, NULL, mem_close
531 };
532
533 static hFILE *hopen_mem(const char *data, const char *mode)
534 {
535 // TODO Implement write modes, which will require memory allocation
536 if (strchr(mode, 'r') == NULL) { errno = EINVAL; return NULL; }
537
538 hFILE_mem *fp = (hFILE_mem *) hfile_init(sizeof (hFILE_mem), mode, 0);
539 if (fp == NULL) return NULL;
540
541 fp->buffer = data;
542 fp->length = strlen(data);
543 fp->pos = 0;
544 fp->base.backend = &mem_backend;
545 return &fp->base;
546 }
547 */
548
549
550 /******************************
551 * hopen() backend dispatcher *
552 ******************************/
553
hopen(const char * fname,const char * mode)554 hFILE *hopen(const char *fname, const char *mode)
555 {
556 // if (strncmp(fname, "http://", 7) == 0 ||
557 // strncmp(fname, "ftp://", 6) == 0) return hopen_net(fname, mode);
558 #ifdef HAVE_IRODS
559 // else if (strncmp(fname, "irods:", 6) == 0) return hopen_irods(fname, mode);
560 #endif
561 // else if (strncmp(fname, "data:", 5) == 0) return hopen_mem(fname + 5, mode);
562 if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode);
563 else return hopen_fd(fname, mode);
564 }
565
566 /*
567 int hisremote(const char *fname)
568 {
569 // FIXME Make a new backend entry to return this
570 if (strncmp(fname, "http://", 7) == 0 ||
571 strncmp(fname, "https://", 8) == 0 ||
572 strncmp(fname, "ftp://", 6) == 0) return 1;
573 #ifdef HAVE_IRODS
574 else if (strncmp(fname, "irods:", 6) == 0) return 1;
575 #endif
576 else return 0;
577 }
578 */
579