1 /* vim: set ts=8 sts=4 sw=4 tw=80 noet: */
2 /*======================================================================
3 Copyright (C) 2004,2005,2009,2013 Walter Doekes <walter+tthsum@wjd.nu>
4 This file is part of tthsum.
5 
6 tthsum is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 tthsum is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with tthsum.  If not, see <http://www.gnu.org/licenses/>.
18 ======================================================================*/
19 #include "read.h"
20 
21 #include <sys/stat.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #ifdef _WIN32
30 #   define STDIN_FILENO fileno(stdin) /* fileno is in <stdio.h> */
31 #   define WINDOWS_LEAN_AND_MEAN
32 #   include <windows.h>
33 #else /* !_WIN32 */
34 #   include <unistd.h>
35 #   include <sys/types.h>
36 #   include <sys/mman.h>
37 #   define O_BINARY 0
38 #endif /* !_WIN32 */
39 
40 #ifdef USE_TEXTS
41 #   include "texts.h"
42 #endif
43 
44 #define DEFAULT_BLOCK_SIZE 8192 /* must be a multiple of 1024 */
45 
46 
47 enum rofile_type { MEM, MMAP, SYS };
48 
49 struct rofile_mem {
50     char* buf;
51     const char* cur;
52     unsigned left;
53 };
54 
55 struct rofile_mmap {
56 #ifdef _WIN32
57     HANDLE fd;
58     HANDLE map;
59     LPVOID view;
60 #else /* !_WIN32 */
61     int fd;
62     void* map;
63     unsigned last;
64 #endif /* !_WIN32 */
65     uint64_t off;
66     uint64_t left;
67 };
68 
69 struct rofile_sys {
70     int fd;
71     int close_fd; /* close fd afterwards */
72 #ifdef _WIN32
73     int fdmode; /* original fd mode (bin/txt) */
74 #endif /* _WIN32 */
75     int done;
76     char* buf;
77 };
78 
79 struct rofile {
80     enum rofile_type rof_type;
81     unsigned rof_blocksize;
82     uint64_t rof_filesize;
83     union {
84 	struct rofile_mem rof_mem;
85 	struct rofile_mmap rof_mmap;
86 	struct rofile_sys rof_sys;
87     } u; /* ansi does not allow anonymous unions */
88 };
89 
90 
close_or_warn(int fd)91 _INLINE static void close_or_warn(int fd) {
92     /* Send warning to stderr but ignore failure...
93      * After all, we did only read bytes, not write any. */
94     if (close(fd) != 0)
95 	perror("rofclose: closing fd failed");
96 }
97 
rofopen_mem(const char * data,unsigned length)98 struct rofile* rofopen_mem(const char* data, unsigned length) {
99     struct rofile* rf = NULL;
100     rf = (struct rofile*)malloc(sizeof(struct rofile));
101     if (rf != NULL) {
102 	rf->rof_type = MEM;
103 	rf->rof_blocksize = DEFAULT_BLOCK_SIZE;
104 	rf->rof_filesize = length;
105 	rf->u.rof_mem.buf = (char*)malloc(length);
106 	if (rf->u.rof_mem.buf != NULL) {
107 	    memcpy(rf->u.rof_mem.buf, data, length);
108 	    rf->u.rof_mem.cur = rf->u.rof_mem.buf;
109 	    rf->u.rof_mem.left = length;
110 	    return rf;
111 	} else {
112 #ifdef USE_TEXTS
113 	    set_error("malloc", ERROR_FROM_OS);
114 #endif /* USE_TEXTS */
115 	}
116 	free(rf);
117     } else {
118 #ifdef USE_TEXTS
119 	set_error("malloc", ERROR_FROM_OS);
120 #endif /* USE_TEXTS */
121     }
122     return NULL;
123 }
124 
rofopen_mmap(const char * filename)125 struct rofile* rofopen_mmap(const char* filename) {
126     struct rofile* rf = NULL;
127 #ifdef _WIN32
128     SYSTEM_INFO si;
129     LARGE_INTEGER li;
130     rf = (struct rofile*)malloc(sizeof(struct rofile));
131     if (rf == NULL) {
132 #ifdef USE_TEXTS
133 	set_error("malloc", ERROR_FROM_OS);
134 #endif /* USE_TEXTS */
135 	return NULL;
136     }
137     rf->rof_type = MMAP;
138     GetSystemInfo(&si);
139     /* 0x1000000 = 2^24 = 1.6MiB */
140     rf->rof_blocksize = 0x1000000 - (0x1000000 % si.dwAllocationGranularity);
141     rf->u.rof_mmap.fd = CreateFile(filename, GENERIC_READ, FILE_SHARE_READ,
142 	    NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
143     if (rf->u.rof_mmap.fd != INVALID_HANDLE_VALUE) {
144 	if (GetFileSizeEx(rf->u.rof_mmap.fd, &li) == TRUE) {
145 	    rf->rof_filesize = (uint64_t)li.QuadPart;
146 	    rf->u.rof_mmap.left = rf->rof_filesize;
147 	    rf->u.rof_mmap.off = 0;
148 	    rf->u.rof_mmap.view = NULL;
149 	    /* windows fails to map empty files :-/ */
150 	    if (rf->rof_filesize != 0) {
151 		rf->u.rof_mmap.map = CreateFileMapping(rf->u.rof_mmap.fd, NULL,
152 			PAGE_READONLY, 0, 0, NULL);
153 		if (rf->u.rof_mmap.map != NULL)
154 		    return rf;
155 #ifdef USE_TEXTS
156 		else
157 		    set_error("CreateFileMapping", ERROR_FROM_OS);
158 #endif /* USE_TEXTS */
159 	    } else {
160 		rf->u.rof_mmap.map = NULL;
161 		return rf;
162 	    }
163 #ifdef USE_TEXTS
164 	} else {
165 	    set_error("GetFileSizeEx", ERROR_FROM_OS);
166 #endif /* USE_TEXTS */
167 	}
168 	CloseHandle(rf->u.rof_mmap.fd);
169 #ifdef USE_TEXTS
170     } else {
171 	set_error("CreateFile", ERROR_FROM_OS);
172 #endif /* USE_TEXTS */
173     }
174 #else /* !_WIN32 */
175     struct stat st;
176     rf = (struct rofile*)malloc(sizeof(struct rofile));
177     if (rf != NULL) {
178 	rf->rof_type = MMAP;
179 	rf->u.rof_mmap.fd = open(filename, O_RDONLY | O_BINARY);
180 	if (rf->u.rof_mmap.fd != -1) {
181 	    if (fstat(rf->u.rof_mmap.fd, &st) != -1) {
182 		rf->rof_filesize = rf->u.rof_mmap.left = (uint64_t)st.st_size;
183 		/* 0x1000000 = 2^24 = 1.6MiB */
184 		rf->rof_blocksize = 0x1000000;
185 #if defined(_BSD_SOURCE) || _XOPEN_SOURCE >= 500
186 		rf->rof_blocksize -= 0x1000000 % getpagesize();
187 #endif /* _BSD_SOURCE || _XOPEN_SOURCE >= 500 */
188 		rf->u.rof_mmap.off = 0;
189 		rf->u.rof_mmap.map = NULL;
190 		return rf;
191 #ifdef USE_TEXTS
192 	    } else {
193 		set_error("fstat", ERROR_FROM_OS);
194 #endif /* USE_TEXTS */
195 	    }
196 	    close_or_warn(rf->u.rof_mmap.fd);
197 #ifdef USE_TEXTS
198 	} else {
199 	    set_error("open", ERROR_FROM_OS);
200 #endif /* USE_TEXTS */
201 	}
202     }
203 #endif /* !_WIN32 */
204     free(rf);
205     return NULL;
206 }
207 
rofopen_sysfd(int fd)208 struct rofile* rofopen_sysfd(int fd) {
209     struct rofile *rf = NULL;
210 #ifdef _WIN32
211     int old_fdmode;
212     if ((old_fdmode = setmode(fd, O_BINARY)) == -1) {
213 #ifdef USE_TEXTS
214 	set_error("fdmode", ERROR_FROM_OS);
215 #endif /* USE_TEXTS */
216 	return NULL;
217     }
218 #endif /* _WIN32 */
219 
220     if ((rf = (struct rofile*)malloc(sizeof(struct rofile))) != NULL) {
221 	rf->rof_type = SYS;
222 	rf->rof_blocksize = DEFAULT_BLOCK_SIZE;
223 	rf->rof_filesize = (uint64_t)-1; /* the "unknown" size */
224 	rf->u.rof_sys.fd = fd;
225 	rf->u.rof_sys.close_fd = 0;
226 #ifdef _WIN32
227 	rf->u.rof_sys.fdmode = old_fdmode;
228 #endif /* _WIN32 */
229 	rf->u.rof_sys.done = 0;
230 	if ((rf->u.rof_sys.buf = (char*)malloc(rf->rof_blocksize)) != NULL) {
231 	    return rf;
232 	} else {
233 #ifdef USE_TEXTS
234 	    set_error("malloc", ERROR_FROM_OS);
235 #endif /* USE_TEXTS */
236 	}
237 	free(rf);
238     } else {
239 #ifdef USE_TEXTS
240 	set_error("malloc", ERROR_FROM_OS);
241 #endif /* USE_TEXTS */
242     }
243 
244 #ifdef _WIN32
245     if (old_fdmode != O_BINARY)
246 	setmode(fd, old_fdmode);
247 #endif /* _WIN32 */
248     return NULL;
249 }
250 
rofopen_sysfd_stdin()251 struct rofile* rofopen_sysfd_stdin() {
252     return rofopen_sysfd(STDIN_FILENO);
253 }
254 
rofopen_sysfile(const char * filename)255 struct rofile* rofopen_sysfile(const char* filename) {
256     int fd;
257     if ((fd = open(filename, O_RDONLY | O_BINARY)) >= 0) {
258 	struct rofile* rf;
259 	if ((rf = rofopen_sysfd(fd)) != NULL) {
260 	    struct stat st;
261 	    if (fstat(fd, &st) == 0)
262 		rf->rof_filesize = (uint64_t)st.st_size;
263 	    rf->u.rof_sys.close_fd = 1;
264 	    return rf;
265 	}
266 	close_or_warn(fd);
267     } else {
268 #ifdef USE_TEXTS
269 	set_error("open", ERROR_FROM_OS);
270 #endif /* USE_TEXTS */
271     }
272     return NULL;
273 }
274 
rofread_mem(const char ** next,unsigned * size,struct rofile * rf)275 _INLINE static int rofread_mem(const char** next, unsigned* size,
276 	struct rofile* rf) {
277     if (rf->u.rof_mem.left == 0)
278 	return 0;
279     if (rf->u.rof_mem.left >= rf->rof_blocksize)
280 	*size = rf->rof_blocksize;
281     else
282 	*size = rf->u.rof_mem.left;
283     *next = rf->u.rof_mem.cur;
284     rf->u.rof_mem.cur += *size;
285     rf->u.rof_mem.left -= *size;
286     return 1;
287 }
288 
rofread_mmap(const char ** next,unsigned * size,struct rofile * rf)289 _INLINE static int rofread_mmap(const char** next, unsigned* size,
290 	struct rofile* rf) {
291 #ifdef _WIN32
292     unsigned mapped_size;
293 #endif /* _WIN32 */
294     if (rf->u.rof_mmap.left == 0)
295 	return 0;
296 #ifdef _WIN32
297     if (rf->u.rof_mmap.view)
298 	UnmapViewOfFile(rf->u.rof_mmap.view);
299     mapped_size = rf->u.rof_mmap.left < (uint64_t)rf->rof_blocksize
300 		? (unsigned)rf->u.rof_mmap.left : rf->rof_blocksize;
301     rf->u.rof_mmap.view = MapViewOfFile(rf->u.rof_mmap.map, FILE_MAP_READ,
302 	    rf->u.rof_mmap.off >> 32, (DWORD)rf->u.rof_mmap.off, mapped_size);
303     if (rf->u.rof_mmap.view == NULL) {
304 #ifdef USE_TEXTS
305 	set_error("MapViewOfFile", ERROR_FROM_OS);
306 #endif /* USE_TEXTS */
307 	return -1;
308     }
309     *next = rf->u.rof_mmap.view;
310     rf->u.rof_mmap.off += mapped_size;
311     rf->u.rof_mmap.left -= mapped_size;
312     *size = mapped_size;
313 #else /* !_WIN32 */
314     if (rf->u.rof_mmap.map)
315 	munmap(rf->u.rof_mmap.map, rf->u.rof_mmap.last);
316     rf->u.rof_mmap.last = rf->u.rof_mmap.left < (uint64_t)rf->rof_blocksize
317 	    ? (unsigned)rf->u.rof_mmap.left : rf->rof_blocksize;
318     rf->u.rof_mmap.map = mmap(0, rf->u.rof_mmap.last, PROT_READ, MAP_SHARED,
319 	    rf->u.rof_mmap.fd, (off_t)rf->u.rof_mmap.off);
320     if (rf->u.rof_mmap.map == MAP_FAILED) {
321 #ifdef USE_TEXTS
322 	set_error("mmap", ERROR_FROM_OS);
323 #endif /* USE_TEXTS */
324 	rf->u.rof_mmap.map = NULL;
325 	return -1;
326     }
327 #ifdef _BSD_SOURCE
328     madvise(rf->u.rof_mmap.map, rf->u.rof_mmap.last,
329 	    MADV_SEQUENTIAL | MADV_WILLNEED);
330 #endif /* _BSD_SOURCE */
331     *next = (const char*)rf->u.rof_mmap.map;
332     rf->u.rof_mmap.off += rf->u.rof_mmap.last;
333     rf->u.rof_mmap.left -= rf->u.rof_mmap.last;
334     *size = rf->u.rof_mmap.last;
335 #endif /* !_WIN32 */
336     return 1;
337 }
338 
rofread_sys(const char ** next,unsigned * size,struct rofile * rf)339 _INLINE static int rofread_sys(const char** next, unsigned* size,
340 	struct rofile* rf) {
341     unsigned len = 0;
342     if (rf->u.rof_sys.done)
343 	return 0;
344     *next = rf->u.rof_sys.buf;
345     do {
346 	int ret;
347 	if ((ret = (int)read(rf->u.rof_sys.fd, rf->u.rof_sys.buf + len,
348 		rf->rof_blocksize - len)) < 0) {
349 #ifdef _WIN32
350 	    /* MSDN says:
351 	     * _read returns the number of bytes read, which may be less than
352 	     * count if there are fewer than count bytes left in the file or if
353 	     * the file was opened in text mode [snip] (nothing about EINTR) */
354 #else /* !_WIN32 */
355 	    /* man 2 read says:
356 	     * It is not an error if this number is smaller than the number of
357 	     * bytes requested; this may happen for example because fewer bytes
358 	     * are actually available right now (maybe because we were close to
359 	     * end-of-file, or because we are reading from a pipe, or from a
360 	     * terminal), or because read() was interrupted by a signal. */
361 	    if (errno == EINTR)
362 		continue;
363 #endif /* !_WIN32 */
364 #ifdef USE_TEXTS
365 	    set_error("read", ERROR_FROM_OS);
366 #endif /* USE_TEXTS */
367 	    return -1;
368 	} else if (ret == 0) {
369 	    rf->u.rof_sys.done = 1;
370 	    if (len == 0)
371 		return 0;
372 	    break;
373 	} else {
374 	    len += ret;
375 	}
376     } while (len < rf->rof_blocksize);
377     *size = len;
378     return 1;
379 }
380 
rofclose_mem(struct rofile * rf)381 _INLINE static void rofclose_mem(struct rofile* rf) {
382     free(rf->u.rof_mem.buf);
383     free(rf);
384 }
385 
rofclose_mmap(struct rofile * rf)386 _INLINE static void rofclose_mmap(struct rofile* rf) {
387 #ifdef _WIN32
388     if (rf->u.rof_mmap.view)
389 	UnmapViewOfFile(rf->u.rof_mmap.view);
390     if (rf->u.rof_mmap.map)
391 	CloseHandle(rf->u.rof_mmap.map);
392     CloseHandle(rf->u.rof_mmap.fd);
393 #else /* !_WIN32 */
394     if (rf->u.rof_mmap.map)
395 	munmap(rf->u.rof_mmap.map, rf->u.rof_mmap.last);
396     close_or_warn(rf->u.rof_mmap.fd);
397 #endif /* !_WIN32 */
398     free(rf);
399 }
400 
rofclose_sys(struct rofile * rf)401 _INLINE static void rofclose_sys(struct rofile* rf) {
402     free(rf->u.rof_sys.buf);
403     if (rf->u.rof_sys.close_fd)
404 	close_or_warn(rf->u.rof_sys.fd);
405 #ifdef _WIN32
406     else if (rf->u.rof_sys.fdmode != O_BINARY)
407 	/* Ignore return value.. we can't do anything about it anyway */
408 	setmode(rf->u.rof_sys.fd, rf->u.rof_sys.fdmode);
409 #endif /* _WIN32 */
410     free(rf);
411 }
412 
rofinfo(unsigned * blocksize,uint64_t * filesize,struct rofile * stream)413 void rofinfo(unsigned* blocksize, uint64_t* filesize, struct rofile* stream) {
414     *blocksize = stream->rof_blocksize;
415     *filesize = stream->rof_filesize;
416 }
417 
rofread(const char ** next,unsigned * size,struct rofile * stream)418 int rofread(const char** next, unsigned* size, struct rofile* stream) {
419     /* I assume sysread will be the most used case, check it first */
420     if (stream->rof_type == SYS)
421 	return rofread_sys(next, size, stream);
422     else if (stream->rof_type == MMAP)
423 	return rofread_mmap(next, size, stream);
424     else if (stream->rof_type == MEM)
425 	return rofread_mem(next, size, stream);
426     return -1;
427 }
428 
rofclose(struct rofile * stream)429 void rofclose(struct rofile* stream) {
430     if (stream->rof_type == MEM)
431 	rofclose_mem(stream);
432     else if (stream->rof_type == MMAP)
433 	rofclose_mmap(stream);
434     else if (stream->rof_type == SYS)
435 	rofclose_sys(stream);
436 }
437 
rof_readall(struct rofile * stream,unsigned * length)438 char* rof_readall(struct rofile* stream, unsigned* length) {
439     int ret;
440     const char* next;
441     char* buf;
442     size_t memsize = 4096, written = 0;
443     unsigned left, blocksize, readsize;
444     uint64_t filesize;
445 
446     if (stream == NULL)
447 	return NULL;
448 
449     rofinfo(&blocksize, &filesize, stream);
450     if (filesize != (uint64_t)-1) {
451 	memsize = (size_t)filesize;
452 	/* Special files may return 0 but contain more data */
453 	if (memsize == 0)
454 	    memsize = 8;
455     }
456 
457     left = (unsigned)memsize;
458     if ((buf = (char*)malloc(memsize)) == NULL) {
459 #ifdef USE_TEXTS
460 	set_error("malloc", ERROR_FROM_OS);
461 #endif /* USE_TEXTS */
462 	return NULL;
463     }
464 
465     while ((ret = rofread(&next, &readsize, stream)) > 0) {
466 	while (readsize > left) {
467 	    char* newbuf = NULL;
468 	    memsize <<= 1;
469 	    if ((ssize_t)memsize > 0) /* Catch overflow */
470 		newbuf = (char*)realloc(buf, memsize);
471 	    else
472 		errno = ENOMEM;
473 	    if (newbuf == NULL) {
474 #ifdef USE_TEXTS
475 		set_error("realloc", ERROR_FROM_OS);
476 #endif /* USE_TEXTS */
477 		free(buf);
478 		return NULL;
479 	    }
480 	    buf = newbuf;
481 	    left = memsize - written;
482 	}
483 	memcpy(buf + written, next, readsize);
484 	written += readsize;
485 	left -= readsize;
486     }
487     if (ret == -1) {
488 	free(buf);
489 	return NULL;
490     }
491 
492     *length = written;
493     return buf;
494 }
495