1 // ================================================================
2 // Note: there are multiple process methods with a lot of code duplication.
3 // This is intentional. Much of Miller's measured processing time is in the
4 // lrec-reader process methods. This is code which needs to execute on every
5 // byte of input and even moving a single runtime if-statement into a
6 // function-pointer assignment at alloc time can have noticeable effects on
7 // performance (5-10% in some cases).
8 // ================================================================
9
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include "cli/comment_handling.h"
13 #include "lib/mlrutil.h"
14 #include "input/file_reader_stdio.h"
15 #include "input/line_readers.h"
16 #include "input/lrec_readers.h"
17
18 typedef struct _lrec_reader_stdio_xtab_state_t {
19 char* ifs;
20 char* ips;
21 int ifslen;
22 int ipslen;
23 int allow_repeat_ips;
24 int do_auto_line_term;
25 int at_eof;
26 comment_handling_t comment_handling;
27 char* comment_string;
28 size_t line_length;
29 } lrec_reader_stdio_xtab_state_t;
30
31 static void lrec_reader_stdio_xtab_free(lrec_reader_t* preader);
32 static void lrec_reader_stdio_xtab_sof(void* pvstate, void* pvhandle);
33 static lrec_t* lrec_reader_stdio_xtab_process(void* pvstate, void* pvhandle, context_t* pctx);
34
35 // ----------------------------------------------------------------
lrec_reader_stdio_xtab_alloc(char * ifs,char * ips,int allow_repeat_ips,comment_handling_t comment_handling,char * comment_string)36 lrec_reader_t* lrec_reader_stdio_xtab_alloc(char* ifs, char* ips, int allow_repeat_ips,
37 comment_handling_t comment_handling, char* comment_string)
38 {
39 lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));
40
41 lrec_reader_stdio_xtab_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_xtab_state_t));
42 pstate->ifs = ifs;
43 pstate->ips = ips;
44 pstate->ifslen = strlen(ifs);
45 pstate->ipslen = strlen(ips);
46 pstate->allow_repeat_ips = allow_repeat_ips;
47 pstate->do_auto_line_term = FALSE;
48 pstate->at_eof = FALSE;
49 pstate->comment_handling = comment_handling;
50 pstate->comment_string = comment_string;
51 // This is used to track nominal line length over the file read. Bootstrap with a default length.
52 pstate->line_length = MLR_ALLOC_READ_LINE_INITIAL_SIZE;
53
54 if (streq(ifs, "auto")) {
55 pstate->do_auto_line_term = TRUE;
56 pstate->ifs = "\n";
57 pstate->ifslen = 1;
58 }
59
60 plrec_reader->pvstate = (void*)pstate;
61 plrec_reader->popen_func = file_reader_stdio_vopen;
62 plrec_reader->pclose_func = file_reader_stdio_vclose;
63 plrec_reader->pprocess_func = lrec_reader_stdio_xtab_process;
64 plrec_reader->psof_func = lrec_reader_stdio_xtab_sof;
65 plrec_reader->pfree_func = lrec_reader_stdio_xtab_free;
66
67 return plrec_reader;
68 }
69
lrec_reader_stdio_xtab_free(lrec_reader_t * preader)70 static void lrec_reader_stdio_xtab_free(lrec_reader_t* preader) {
71 free(preader->pvstate);
72 free(preader);
73 }
74
lrec_reader_stdio_xtab_sof(void * pvstate,void * pvhandle)75 static void lrec_reader_stdio_xtab_sof(void* pvstate, void* pvhandle) {
76 lrec_reader_stdio_xtab_state_t* pstate = pvstate;
77 pstate->at_eof = FALSE;
78 }
79
80 // ----------------------------------------------------------------
lrec_reader_stdio_xtab_process(void * pvstate,void * pvhandle,context_t * pctx)81 static lrec_t* lrec_reader_stdio_xtab_process(void* pvstate, void* pvhandle, context_t* pctx) {
82 FILE* input_stream = pvhandle;
83 lrec_reader_stdio_xtab_state_t* pstate = pvstate;
84
85 if (pstate->at_eof)
86 return NULL;
87
88 slls_t* pxtab_lines = slls_alloc();
89
90 while (TRUE) {
91 char* line = NULL;
92
93 if (pstate->comment_handling == COMMENTS_ARE_DATA) {
94 if (pstate->ifslen == 1)
95 line = mlr_alloc_read_line_single_delimiter(input_stream, pstate->ifs[0],
96 &pstate->line_length, pstate->do_auto_line_term, pctx);
97 else
98 line = mlr_alloc_read_line_multiple_delimiter(input_stream, pstate->ifs, pstate->ifslen,
99 &pstate->line_length);
100 } else {
101 if (pstate->ifslen == 1)
102 line = mlr_alloc_read_line_single_delimiter_stripping_comments(input_stream, pstate->ifs[0],
103 &pstate->line_length, pstate->do_auto_line_term,
104 pstate->comment_handling, pstate->comment_string, pctx);
105 else
106 line = mlr_alloc_read_line_multiple_delimiter_stripping_comments(input_stream,
107 pstate->ifs, pstate->ifslen, &pstate->line_length,
108 pstate->comment_handling, pstate->comment_string);
109 }
110
111 if (line == NULL) { // EOF
112 // EOF or blank line terminates the stanza.
113 pstate->at_eof = TRUE;
114 if (pxtab_lines->length == 0) {
115 slls_free(pxtab_lines);
116 return NULL;
117 } else {
118 return (pstate->ipslen == 1)
119 ? lrec_parse_stdio_xtab_single_ips(pxtab_lines, pstate->ips[0], pstate->allow_repeat_ips)
120 : lrec_parse_stdio_xtab_multi_ips(pxtab_lines, pstate->ips, pstate->ipslen,
121 pstate->allow_repeat_ips);
122 }
123
124 } else if (*line == '\0') {
125 free(line);
126 if (pxtab_lines->length > 0) {
127 return (pstate->ipslen == 1)
128 ? lrec_parse_stdio_xtab_single_ips(pxtab_lines, pstate->ips[0], pstate->allow_repeat_ips)
129 : lrec_parse_stdio_xtab_multi_ips(pxtab_lines, pstate->ips, pstate->ipslen,
130 pstate->allow_repeat_ips);
131 }
132
133 } else {
134 slls_append_with_free(pxtab_lines, line);
135 }
136 }
137 }
138
139 // ----------------------------------------------------------------
lrec_parse_stdio_xtab_single_ips(slls_t * pxtab_lines,char ips,int allow_repeat_ips)140 lrec_t* lrec_parse_stdio_xtab_single_ips(slls_t* pxtab_lines, char ips, int allow_repeat_ips) {
141 lrec_t* prec = lrec_xtab_alloc(pxtab_lines);
142
143 for (sllse_t* pe = pxtab_lines->phead; pe != NULL; pe = pe->pnext) {
144 char* line = pe->value;
145 char* p = line;
146 char* key = p;
147
148 while (*p != 0 && *p != ips)
149 p++;
150 if (*p == 0) {
151 lrec_put(prec, key, "", NO_FREE);
152 } else {
153 while (*p != 0 && *p == ips) {
154 *p = 0;
155 p++;
156 }
157 lrec_put(prec, key, p, NO_FREE);
158 }
159 }
160
161 return prec;
162 }
163
lrec_parse_stdio_xtab_multi_ips(slls_t * pxtab_lines,char * ips,int ipslen,int allow_repeat_ips)164 lrec_t* lrec_parse_stdio_xtab_multi_ips(slls_t* pxtab_lines, char* ips, int ipslen, int allow_repeat_ips) {
165 lrec_t* prec = lrec_xtab_alloc(pxtab_lines);
166
167 for (sllse_t* pe = pxtab_lines->phead; pe != NULL; pe = pe->pnext) {
168 char* line = pe->value;
169 char* p = line;
170 char* key = p;
171
172 while (*p != 0 && !streqn(p, ips, ipslen))
173 p++; // Advance by only 1 in case of subsequent match
174 if (*p == 0) {
175 lrec_put(prec, key, "", NO_FREE);
176 } else {
177 while (*p != 0 && streqn(p, ips, ipslen)) {
178 *p = 0;
179 p += ipslen;
180 }
181 lrec_put(prec, key, p, NO_FREE);
182 }
183 }
184
185 return prec;
186 }
187