1 #include <stdio.h>
2 #include "lib/mlr_arch.h"
3 #include "lib/mlrutil.h"
4 #include "input/line_readers.h"
5
6 // ----------------------------------------------------------------
mlr_alloc_read_line_single_delimiter(FILE * fp,int delimiter,size_t * pold_then_new_strlen,int do_auto_line_term,context_t * pctx)7 char* mlr_alloc_read_line_single_delimiter(
8 FILE* fp,
9 int delimiter,
10 size_t* pold_then_new_strlen,
11 int do_auto_line_term,
12 context_t* pctx)
13 {
14 size_t linecap = power_of_two_above(*pold_then_new_strlen + 1); // +1 for null-terminator
15 char* restrict line = mlr_malloc_or_die(linecap);
16 char* restrict p = line;
17 int reached_eof = FALSE;
18 int c;
19 int nread = 0;
20
21 while (TRUE) {
22 size_t offset = p - line;
23 if (offset >= linecap) {
24 linecap = linecap << 1;
25 line = mlr_realloc_or_die(line, linecap);
26 p = line + offset;
27 }
28 c = mlr_arch_getc(fp);
29 if (c == EOF) {
30 *p = 0;
31 reached_eof = TRUE;
32 break;
33 } else if (c == delimiter) {
34 nread++;
35 *p = 0;
36 break;
37 } else {
38 nread++;
39 *(p++) = c;
40 }
41 }
42
43 if (do_auto_line_term) {
44 char* q = p - 1;
45 if (q >= line && *q == '\r') {
46 *q = 0;
47 context_set_autodetected_crlf(pctx);
48 p = q;
49 } else {
50 context_set_autodetected_lf(pctx);
51 }
52 }
53
54 // linelen excludes line-ending characters.
55 // nread includes line-ending characters.
56 int linelen = p - line;
57 if (nread == 0 && reached_eof) {
58 free(line);
59 line = NULL;
60 linelen = 0;
61 }
62 *pold_then_new_strlen = linelen;
63
64 return line;
65 }
66
67 // ----------------------------------------------------------------
mlr_alloc_read_line_multiple_delimiter(FILE * fp,char * delimiter,int delimiter_length,size_t * pold_then_new_strlen)68 char* mlr_alloc_read_line_multiple_delimiter(
69 FILE* fp,
70 char* delimiter,
71 int delimiter_length,
72 size_t* pold_then_new_strlen)
73 {
74 size_t linecap = power_of_two_above(*pold_then_new_strlen + 1); // +1 for null-terminator
75 char* line = mlr_malloc_or_die(linecap);
76 char* p = line; // points to null-terminator in (chomped) output string
77 char* q = line; // points to end of line in (non-chomped) data read from file
78 int reached_eof = FALSE;
79 int c;
80 int nread = 0;
81 int dlm1 = delimiter_length - 1;
82 char delimend = delimiter[dlm1];
83
84 while (TRUE) {
85 size_t offset = q - line;
86 if (offset >= linecap-1) {
87 linecap = linecap << 1;
88 line = mlr_realloc_or_die(line, linecap);
89 q = line + offset;
90 }
91 c = mlr_arch_getc(fp);
92 if (c == EOF) {
93 *q = 0;
94 reached_eof = TRUE;
95 p = q;
96 break;
97 } else if (c == delimend) {
98 // For efficiency, do a single-character test to see if we've seen
99 // the last character in the line-ending sequence. If we have, then
100 // strcmp back to see if we've seen the entire line-ending sequence.
101 //
102 // This function exists separately from in order to avoid the performance
103 // penalty of this strcmp.
104 nread++;
105 *(q++) = c;
106 p = q - delimiter_length;
107 if (q - line >= delimiter_length && memcmp(p, delimiter, delimiter_length) == 0) {
108 *p = 0;
109 break;
110 }
111 } else {
112 nread++;
113 *(q++) = c;
114 }
115 }
116
117 // linelen excludes line-ending characters.
118 // nread includes line-ending characters.
119 int linelen = p - line;
120 if (nread == 0 && reached_eof) {
121 free(line);
122 line = NULL;
123 linelen = 0;
124 }
125 *pold_then_new_strlen = linelen;
126
127 return line;
128 }
129
130 // ----------------------------------------------------------------
mlr_alloc_read_line_single_delimiter_stripping_comments(FILE * fp,int delimiter,size_t * pold_then_new_strlen,int do_auto_line_term,comment_handling_t comment_handling,char * comment_string,context_t * pctx)131 char* mlr_alloc_read_line_single_delimiter_stripping_comments(
132 FILE* fp,
133 int delimiter,
134 size_t* pold_then_new_strlen,
135 int do_auto_line_term,
136 comment_handling_t comment_handling,
137 char* comment_string,
138 context_t* pctx)
139 {
140 return mlr_alloc_read_line_single_delimiter_stripping_comments_aux(
141 fp,
142 delimiter,
143 pold_then_new_strlen,
144 do_auto_line_term,
145 comment_handling,
146 comment_string,
147 NULL,
148 pctx);
149 }
150
mlr_alloc_read_line_single_delimiter_stripping_comments_aux(FILE * fp,int delimiter,size_t * pold_then_new_strlen,int do_auto_line_term,comment_handling_t comment_handling,char * comment_string,int * pnum_lines_comment_skipped,context_t * pctx)151 char* mlr_alloc_read_line_single_delimiter_stripping_comments_aux(
152 FILE* fp,
153 int delimiter,
154 size_t* pold_then_new_strlen,
155 int do_auto_line_term,
156 comment_handling_t comment_handling,
157 char* comment_string,
158 int* pnum_lines_comment_skipped,
159 context_t* pctx)
160 {
161 if (pnum_lines_comment_skipped != NULL)
162 *pnum_lines_comment_skipped = 0;
163 while (TRUE) {
164 char* line = mlr_alloc_read_line_single_delimiter(
165 fp, delimiter, pold_then_new_strlen, do_auto_line_term, pctx);
166 if (line == NULL) {
167 return line;
168 } else if (string_starts_with(line, comment_string)) {
169 if (pnum_lines_comment_skipped != NULL)
170 (*pnum_lines_comment_skipped)++;
171 if (comment_handling == PASS_COMMENTS) {
172 fputs(line, stdout);
173 if (do_auto_line_term) {
174 fputs(pctx->auto_line_term, stdout);
175 } else {
176 fputc(delimiter, stdout);
177 }
178 fflush(stdout);
179 }
180 free(line);
181 } else {
182 return line;
183 }
184 }
185 }
186
187 // ----------------------------------------------------------------
mlr_alloc_read_line_multiple_delimiter_stripping_comments(FILE * fp,char * delimiter,int delimiter_length,size_t * pold_then_new_strlen,comment_handling_t comment_handling,char * comment_string)188 char* mlr_alloc_read_line_multiple_delimiter_stripping_comments(
189 FILE* fp,
190 char* delimiter,
191 int delimiter_length,
192 size_t* pold_then_new_strlen,
193 comment_handling_t comment_handling,
194 char* comment_string)
195 {
196 return mlr_alloc_read_line_multiple_delimiter_stripping_comments_aux(
197 fp,
198 delimiter,
199 delimiter_length,
200 pold_then_new_strlen,
201 comment_handling,
202 comment_string,
203 NULL);
204 }
205
206 // ----------------------------------------------------------------
mlr_alloc_read_line_multiple_delimiter_stripping_comments_aux(FILE * fp,char * delimiter,int delimiter_length,size_t * pold_then_new_strlen,comment_handling_t comment_handling,char * comment_string,int * pnum_lines_comment_skipped)207 char* mlr_alloc_read_line_multiple_delimiter_stripping_comments_aux(
208 FILE* fp,
209 char* delimiter,
210 int delimiter_length,
211 size_t* pold_then_new_strlen,
212 comment_handling_t comment_handling,
213 char* comment_string,
214 int* pnum_lines_comment_skipped)
215 {
216 if (pnum_lines_comment_skipped != NULL)
217 *pnum_lines_comment_skipped = 0;
218 while (TRUE) {
219 char* line = mlr_alloc_read_line_multiple_delimiter(
220 fp, delimiter, delimiter_length, pold_then_new_strlen);
221 if (line == NULL) {
222 return line;
223 } else if (string_starts_with(line, comment_string)) {
224 if (pnum_lines_comment_skipped != NULL)
225 (*pnum_lines_comment_skipped)++;
226 if (comment_handling == PASS_COMMENTS) {
227 fputs(line, stdout);
228 fputs(delimiter, stdout);
229 fflush(stdout);
230 }
231 free(line);
232 } else {
233 return line;
234 }
235 }
236 }
237