1 #include <stdio.h>
2 #include "lib/mlr_arch.h"
3 #include "lib/mlrutil.h"
4 #include "input/line_readers.h"
5 
6 // ----------------------------------------------------------------
mlr_alloc_read_line_single_delimiter(FILE * fp,int delimiter,size_t * pold_then_new_strlen,int do_auto_line_term,context_t * pctx)7 char* mlr_alloc_read_line_single_delimiter(
8 	FILE*      fp,
9 	int        delimiter,
10 	size_t*    pold_then_new_strlen,
11 	int        do_auto_line_term,
12 	context_t* pctx)
13 {
14 	size_t linecap = power_of_two_above(*pold_then_new_strlen + 1); // +1 for null-terminator
15 	char* restrict line = mlr_malloc_or_die(linecap);
16 	char* restrict p = line;
17 	int reached_eof = FALSE;
18 	int c;
19 	int nread = 0;
20 
21 	while (TRUE) {
22 		size_t offset = p - line;
23 		if (offset >= linecap) {
24 			linecap = linecap << 1;
25 			line = mlr_realloc_or_die(line, linecap);
26 			p = line + offset;
27 		}
28 		c = mlr_arch_getc(fp);
29 		if (c == EOF) {
30 			*p = 0;
31 			reached_eof = TRUE;
32 			break;
33 		} else if (c == delimiter) {
34 			nread++;
35 			*p = 0;
36 			break;
37 		} else {
38 			nread++;
39 			*(p++) = c;
40 		}
41 	}
42 
43 	if (do_auto_line_term) {
44 		char* q = p - 1;
45 		if (q >= line && *q == '\r') {
46 			*q = 0;
47 			context_set_autodetected_crlf(pctx);
48 			p = q;
49 		} else {
50 			context_set_autodetected_lf(pctx);
51 		}
52 	}
53 
54 	// linelen excludes line-ending characters.
55 	// nread   includes line-ending characters.
56 	int linelen = p - line;
57 	if (nread == 0 && reached_eof) {
58 		free(line);
59 		line = NULL;
60 		linelen = 0;
61 	}
62 	*pold_then_new_strlen = linelen;
63 
64 	return line;
65 }
66 
67 // ----------------------------------------------------------------
mlr_alloc_read_line_multiple_delimiter(FILE * fp,char * delimiter,int delimiter_length,size_t * pold_then_new_strlen)68 char* mlr_alloc_read_line_multiple_delimiter(
69 	FILE*      fp,
70 	char*      delimiter,
71 	int        delimiter_length,
72 	size_t*    pold_then_new_strlen)
73 {
74 	size_t linecap = power_of_two_above(*pold_then_new_strlen + 1); // +1 for null-terminator
75 	char* line = mlr_malloc_or_die(linecap);
76 	char* p = line; // points to null-terminator in (chomped) output string
77 	char* q = line; // points to end of line in (non-chomped) data read from file
78 	int reached_eof = FALSE;
79 	int c;
80 	int nread = 0;
81 	int dlm1 = delimiter_length - 1;
82 	char delimend = delimiter[dlm1];
83 
84 	while (TRUE) {
85 		size_t offset = q - line;
86 		if (offset >= linecap-1) {
87 			linecap = linecap << 1;
88 			line = mlr_realloc_or_die(line, linecap);
89 			q = line + offset;
90 		}
91 		c = mlr_arch_getc(fp);
92 		if (c == EOF) {
93 			*q = 0;
94 			reached_eof = TRUE;
95 			p = q;
96 			break;
97 		} else if (c == delimend) {
98 			// For efficiency, do a single-character test to see if we've seen
99 			// the last character in the line-ending sequence. If we have, then
100 			// strcmp back to see if we've seen the entire line-ending sequence.
101 			//
102 			// This function exists separately from in order to avoid the performance
103 			// penalty of this strcmp.
104 			nread++;
105 			*(q++) = c;
106 			p = q - delimiter_length;
107 			if (q - line >= delimiter_length && memcmp(p, delimiter, delimiter_length) == 0) {
108 				*p = 0;
109 				break;
110 			}
111 		} else {
112 			nread++;
113 			*(q++) = c;
114 		}
115 	}
116 
117 	// linelen excludes line-ending characters.
118 	// nread   includes line-ending characters.
119 	int linelen = p - line;
120 	if (nread == 0 && reached_eof) {
121 		free(line);
122 		line = NULL;
123 		linelen = 0;
124 	}
125 	*pold_then_new_strlen = linelen;
126 
127 	return line;
128 }
129 
130 // ----------------------------------------------------------------
mlr_alloc_read_line_single_delimiter_stripping_comments(FILE * fp,int delimiter,size_t * pold_then_new_strlen,int do_auto_line_term,comment_handling_t comment_handling,char * comment_string,context_t * pctx)131 char* mlr_alloc_read_line_single_delimiter_stripping_comments(
132 	FILE*      fp,
133 	int        delimiter,
134 	size_t*    pold_then_new_strlen,
135 	int        do_auto_line_term,
136 	comment_handling_t comment_handling,
137 	char*      comment_string,
138 	context_t* pctx)
139 {
140 	return mlr_alloc_read_line_single_delimiter_stripping_comments_aux(
141 		fp,
142 		delimiter,
143 		pold_then_new_strlen,
144 		do_auto_line_term,
145 		comment_handling,
146 		comment_string,
147 		NULL,
148 		pctx);
149 }
150 
mlr_alloc_read_line_single_delimiter_stripping_comments_aux(FILE * fp,int delimiter,size_t * pold_then_new_strlen,int do_auto_line_term,comment_handling_t comment_handling,char * comment_string,int * pnum_lines_comment_skipped,context_t * pctx)151 char* mlr_alloc_read_line_single_delimiter_stripping_comments_aux(
152 	FILE*      fp,
153 	int        delimiter,
154 	size_t*    pold_then_new_strlen,
155 	int        do_auto_line_term,
156 	comment_handling_t comment_handling,
157 	char*      comment_string,
158 	int*       pnum_lines_comment_skipped,
159 	context_t* pctx)
160 {
161 	if (pnum_lines_comment_skipped != NULL)
162 		*pnum_lines_comment_skipped = 0;
163 	while (TRUE) {
164 		char* line = mlr_alloc_read_line_single_delimiter(
165 			fp, delimiter, pold_then_new_strlen, do_auto_line_term, pctx);
166 		if (line == NULL) {
167 			return line;
168 		} else if (string_starts_with(line, comment_string)) {
169 			if (pnum_lines_comment_skipped != NULL)
170 				(*pnum_lines_comment_skipped)++;
171 			if (comment_handling == PASS_COMMENTS) {
172 				fputs(line, stdout);
173 				if (do_auto_line_term) {
174 					fputs(pctx->auto_line_term, stdout);
175 				} else {
176 					fputc(delimiter, stdout);
177 				}
178 				fflush(stdout);
179 			}
180 			free(line);
181 		} else {
182 			return line;
183 		}
184 	}
185 }
186 
187 // ----------------------------------------------------------------
mlr_alloc_read_line_multiple_delimiter_stripping_comments(FILE * fp,char * delimiter,int delimiter_length,size_t * pold_then_new_strlen,comment_handling_t comment_handling,char * comment_string)188 char* mlr_alloc_read_line_multiple_delimiter_stripping_comments(
189 	FILE*      fp,
190 	char*      delimiter,
191 	int        delimiter_length,
192 	size_t*    pold_then_new_strlen,
193 	comment_handling_t comment_handling,
194 	char*      comment_string)
195 {
196 	return mlr_alloc_read_line_multiple_delimiter_stripping_comments_aux(
197 		fp,
198 		delimiter,
199 		delimiter_length,
200 		pold_then_new_strlen,
201 		comment_handling,
202 		comment_string,
203 		NULL);
204 }
205 
206 // ----------------------------------------------------------------
mlr_alloc_read_line_multiple_delimiter_stripping_comments_aux(FILE * fp,char * delimiter,int delimiter_length,size_t * pold_then_new_strlen,comment_handling_t comment_handling,char * comment_string,int * pnum_lines_comment_skipped)207 char* mlr_alloc_read_line_multiple_delimiter_stripping_comments_aux(
208 	FILE*      fp,
209 	char*      delimiter,
210 	int        delimiter_length,
211 	size_t*    pold_then_new_strlen,
212 	comment_handling_t comment_handling,
213 	char*      comment_string,
214 	int*       pnum_lines_comment_skipped)
215 {
216 	if (pnum_lines_comment_skipped != NULL)
217 		*pnum_lines_comment_skipped = 0;
218 	while (TRUE) {
219 		char* line = mlr_alloc_read_line_multiple_delimiter(
220 			fp, delimiter, delimiter_length, pold_then_new_strlen);
221 		if (line == NULL) {
222 			return line;
223 		} else if (string_starts_with(line, comment_string)) {
224 			if (pnum_lines_comment_skipped != NULL)
225 				(*pnum_lines_comment_skipped)++;
226 			if (comment_handling == PASS_COMMENTS) {
227 				fputs(line, stdout);
228 				fputs(delimiter, stdout);
229 				fflush(stdout);
230 			}
231 			free(line);
232 		} else {
233 			return line;
234 		}
235 	}
236 }
237