1 /*-
2  * Copyright (c) 2008 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "lafe_platform.h"
28 __FBSDID("$FreeBSD$");
29 
30 #include <errno.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 
35 #include "err.h"
36 #include "line_reader.h"
37 
38 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__BORLANDC__)
39 #define strdup _strdup
40 #endif
41 
42 /*
43  * Read lines from file and do something with each one.  If option_null
44  * is set, lines are terminated with zero bytes; otherwise, they're
45  * terminated with newlines.
46  *
47  * This uses a self-sizing buffer to handle arbitrarily-long lines.
48  */
49 struct lafe_line_reader {
50 	FILE *f;
51 	char *buff, *buff_end, *line_start, *line_end, *p;
52 	char *pathname;
53 	size_t buff_length;
54 	int nullSeparator; /* Lines separated by null, not CR/CRLF/etc. */
55 	int ret;
56 };
57 
58 struct lafe_line_reader *
59 lafe_line_reader(const char *pathname, int nullSeparator)
60 {
61 	struct lafe_line_reader *lr;
62 
63 	lr = calloc(1, sizeof(*lr));
64 	if (lr == NULL)
65 		lafe_errc(1, ENOMEM, "Can't open %s", pathname);
66 
67 	lr->nullSeparator = nullSeparator;
68 	lr->pathname = strdup(pathname);
69 
70 	if (strcmp(pathname, "-") == 0)
71 		lr->f = stdin;
72 	else
73 		lr->f = fopen(pathname, "r");
74 	if (lr->f == NULL)
75 		lafe_errc(1, errno, "Couldn't open %s", pathname);
76 	lr->buff_length = 8192;
77 	lr->buff = malloc(lr->buff_length);
78 	if (lr->buff == NULL)
79 		lafe_errc(1, ENOMEM, "Can't read %s", pathname);
80 	lr->line_start = lr->line_end = lr->buff_end = lr->buff;
81 
82 	return (lr);
83 }
84 
85 const char *
86 lafe_line_reader_next(struct lafe_line_reader *lr)
87 {
88 	size_t bytes_wanted, bytes_read, new_buff_size;
89 	char *line_start, *p;
90 
91 	for (;;) {
92 		/* If there's a line in the buffer, return it immediately. */
93 		while (lr->line_end < lr->buff_end) {
94 			if (lr->nullSeparator) {
95 				if (*lr->line_end == '\0') {
96 					line_start = lr->line_start;
97 					lr->line_start = lr->line_end + 1;
98 					lr->line_end = lr->line_start;
99 					return (line_start);
100 				}
101 			} else if (*lr->line_end == '\x0a' || *lr->line_end == '\x0d') {
102 				*lr->line_end = '\0';
103 				line_start = lr->line_start;
104 				lr->line_start = lr->line_end + 1;
105 				lr->line_end = lr->line_start;
106 				if (line_start[0] != '\0')
107 					return (line_start);
108 			}
109 			lr->line_end++;
110 		}
111 
112 		/* If we're at end-of-file, process the final data. */
113 		if (lr->f == NULL) {
114 			/* If there's more text, return one last line. */
115 			if (lr->line_end > lr->line_start) {
116 				*lr->line_end = '\0';
117 				line_start = lr->line_start;
118 				lr->line_start = lr->line_end + 1;
119 				lr->line_end = lr->line_start;
120 				return (line_start);
121 			}
122 			/* Otherwise, we're done. */
123 			return (NULL);
124 		}
125 
126 		/* Buffer only has part of a line. */
127 		if (lr->line_start > lr->buff) {
128 			/* Move a leftover fractional line to the beginning. */
129 			memmove(lr->buff, lr->line_start,
130 			    lr->buff_end - lr->line_start);
131 			lr->buff_end -= lr->line_start - lr->buff;
132 			lr->line_end -= lr->line_start - lr->buff;
133 			lr->line_start = lr->buff;
134 		} else {
135 			/* Line is too big; enlarge the buffer. */
136 			new_buff_size = lr->buff_length * 2;
137 			if (new_buff_size <= lr->buff_length)
138 				lafe_errc(1, ENOMEM,
139 				    "Line too long in %s", lr->pathname);
140 			lr->buff_length = new_buff_size;
141 			p = realloc(lr->buff, new_buff_size);
142 			if (p == NULL)
143 				lafe_errc(1, ENOMEM,
144 				    "Line too long in %s", lr->pathname);
145 			lr->buff_end = p + (lr->buff_end - lr->buff);
146 			lr->line_end = p + (lr->line_end - lr->buff);
147 			lr->line_start = lr->buff = p;
148 		}
149 
150 		/* Get some more data into the buffer. */
151 		bytes_wanted = lr->buff + lr->buff_length - lr->buff_end;
152 		bytes_read = fread(lr->buff_end, 1, bytes_wanted, lr->f);
153 		lr->buff_end += bytes_read;
154 
155 		if (ferror(lr->f))
156 			lafe_errc(1, errno, "Can't read %s", lr->pathname);
157 		if (feof(lr->f)) {
158 			if (lr->f != stdin)
159 				fclose(lr->f);
160 			lr->f = NULL;
161 		}
162 	}
163 }
164 
165 void
166 lafe_line_reader_free(struct lafe_line_reader *lr)
167 {
168 	free(lr->buff);
169 	free(lr->pathname);
170 	free(lr);
171 }
172