1 // ================================================================
2 // Note: there are multiple process methods with a lot of code duplication.
3 // This is intentional. Much of Miller's measured processing time is in the
4 // lrec-reader process methods. This is code which needs to execute on every
5 // byte of input and even moving a single runtime if-statement into a
6 // function-pointer assignment at alloc time can have noticeable effects on
7 // performance (5-10% in some cases).
8 // ================================================================
9 
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include "cli/comment_handling.h"
13 #include "lib/mlrutil.h"
14 #include "input/file_reader_stdio.h"
15 #include "input/line_readers.h"
16 #include "input/lrec_readers.h"
17 
18 typedef struct _lrec_reader_stdio_xtab_state_t {
19 	char*  ifs;
20 	char*  ips;
21 	int    ifslen;
22 	int    ipslen;
23 	int    allow_repeat_ips;
24 	int    do_auto_line_term;
25 	int    at_eof;
26 	comment_handling_t comment_handling;
27 	char*  comment_string;
28 	size_t line_length;
29 } lrec_reader_stdio_xtab_state_t;
30 
31 static void    lrec_reader_stdio_xtab_free(lrec_reader_t* preader);
32 static void    lrec_reader_stdio_xtab_sof(void* pvstate, void* pvhandle);
33 static lrec_t* lrec_reader_stdio_xtab_process(void* pvstate, void* pvhandle, context_t* pctx);
34 
35 // ----------------------------------------------------------------
lrec_reader_stdio_xtab_alloc(char * ifs,char * ips,int allow_repeat_ips,comment_handling_t comment_handling,char * comment_string)36 lrec_reader_t* lrec_reader_stdio_xtab_alloc(char* ifs, char* ips, int allow_repeat_ips,
37 	comment_handling_t comment_handling, char* comment_string)
38 {
39 	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));
40 
41 	lrec_reader_stdio_xtab_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_xtab_state_t));
42 	pstate->ifs               = ifs;
43 	pstate->ips               = ips;
44 	pstate->ifslen            = strlen(ifs);
45 	pstate->ipslen            = strlen(ips);
46 	pstate->allow_repeat_ips  = allow_repeat_ips;
47 	pstate->do_auto_line_term = FALSE;
48 	pstate->at_eof            = FALSE;
49 	pstate->comment_handling  = comment_handling;
50 	pstate->comment_string    = comment_string;
51 	// This is used to track nominal line length over the file read. Bootstrap with a default length.
52 	pstate->line_length       = MLR_ALLOC_READ_LINE_INITIAL_SIZE;
53 
54 	if (streq(ifs, "auto")) {
55 		pstate->do_auto_line_term = TRUE;
56 		pstate->ifs = "\n";
57 		pstate->ifslen = 1;
58 	}
59 
60 	plrec_reader->pvstate       = (void*)pstate;
61 	plrec_reader->popen_func    = file_reader_stdio_vopen;
62 	plrec_reader->pclose_func   = file_reader_stdio_vclose;
63 	plrec_reader->pprocess_func = lrec_reader_stdio_xtab_process;
64 	plrec_reader->psof_func     = lrec_reader_stdio_xtab_sof;
65 	plrec_reader->pfree_func    = lrec_reader_stdio_xtab_free;
66 
67 	return plrec_reader;
68 }
69 
lrec_reader_stdio_xtab_free(lrec_reader_t * preader)70 static void lrec_reader_stdio_xtab_free(lrec_reader_t* preader) {
71 	free(preader->pvstate);
72 	free(preader);
73 }
74 
lrec_reader_stdio_xtab_sof(void * pvstate,void * pvhandle)75 static void lrec_reader_stdio_xtab_sof(void* pvstate, void* pvhandle) {
76 	lrec_reader_stdio_xtab_state_t* pstate = pvstate;
77 	pstate->at_eof = FALSE;
78 }
79 
80 // ----------------------------------------------------------------
lrec_reader_stdio_xtab_process(void * pvstate,void * pvhandle,context_t * pctx)81 static lrec_t* lrec_reader_stdio_xtab_process(void* pvstate, void* pvhandle, context_t* pctx) {
82 	FILE* input_stream = pvhandle;
83 	lrec_reader_stdio_xtab_state_t* pstate = pvstate;
84 
85 	if (pstate->at_eof)
86 		return NULL;
87 
88 	slls_t* pxtab_lines = slls_alloc();
89 
90 	while (TRUE) {
91 		char* line = NULL;
92 
93 		if (pstate->comment_handling == COMMENTS_ARE_DATA) {
94 			if (pstate->ifslen == 1)
95 				line = mlr_alloc_read_line_single_delimiter(input_stream, pstate->ifs[0],
96 					&pstate->line_length, pstate->do_auto_line_term, pctx);
97 			else
98 				line = mlr_alloc_read_line_multiple_delimiter(input_stream, pstate->ifs, pstate->ifslen,
99 					&pstate->line_length);
100 		} else {
101 			if (pstate->ifslen == 1)
102 				line = mlr_alloc_read_line_single_delimiter_stripping_comments(input_stream, pstate->ifs[0],
103 					&pstate->line_length, pstate->do_auto_line_term,
104 					pstate->comment_handling, pstate->comment_string, pctx);
105 			else
106 				line = mlr_alloc_read_line_multiple_delimiter_stripping_comments(input_stream,
107 					pstate->ifs, pstate->ifslen, &pstate->line_length,
108 					pstate->comment_handling, pstate->comment_string);
109 		}
110 
111 		if (line == NULL) { // EOF
112 			// EOF or blank line terminates the stanza.
113 			pstate->at_eof = TRUE;
114 			if (pxtab_lines->length == 0) {
115 				slls_free(pxtab_lines);
116 				return NULL;
117 			} else {
118 				return (pstate->ipslen == 1)
119 					? lrec_parse_stdio_xtab_single_ips(pxtab_lines, pstate->ips[0], pstate->allow_repeat_ips)
120 					: lrec_parse_stdio_xtab_multi_ips(pxtab_lines, pstate->ips, pstate->ipslen,
121 						pstate->allow_repeat_ips);
122 			}
123 
124 		} else if (*line == '\0') {
125 			free(line);
126 			if (pxtab_lines->length > 0) {
127 				return (pstate->ipslen == 1)
128 					? lrec_parse_stdio_xtab_single_ips(pxtab_lines, pstate->ips[0], pstate->allow_repeat_ips)
129 					: lrec_parse_stdio_xtab_multi_ips(pxtab_lines, pstate->ips, pstate->ipslen,
130 						pstate->allow_repeat_ips);
131 			}
132 
133 		} else {
134 			slls_append_with_free(pxtab_lines, line);
135 		}
136 	}
137 }
138 
139 // ----------------------------------------------------------------
lrec_parse_stdio_xtab_single_ips(slls_t * pxtab_lines,char ips,int allow_repeat_ips)140 lrec_t* lrec_parse_stdio_xtab_single_ips(slls_t* pxtab_lines, char ips, int allow_repeat_ips) {
141 	lrec_t* prec = lrec_xtab_alloc(pxtab_lines);
142 
143 	for (sllse_t* pe = pxtab_lines->phead; pe != NULL; pe = pe->pnext) {
144 		char* line = pe->value;
145 		char* p = line;
146 		char* key = p;
147 
148 		while (*p != 0 && *p != ips)
149 			p++;
150 		if (*p == 0) {
151 			lrec_put(prec, key, "", NO_FREE);
152 		} else {
153 			while (*p != 0 && *p == ips) {
154 				*p = 0;
155 				p++;
156 			}
157 			lrec_put(prec, key, p, NO_FREE);
158 		}
159 	}
160 
161 	return prec;
162 }
163 
lrec_parse_stdio_xtab_multi_ips(slls_t * pxtab_lines,char * ips,int ipslen,int allow_repeat_ips)164 lrec_t* lrec_parse_stdio_xtab_multi_ips(slls_t* pxtab_lines, char* ips, int ipslen, int allow_repeat_ips) {
165 	lrec_t* prec = lrec_xtab_alloc(pxtab_lines);
166 
167 	for (sllse_t* pe = pxtab_lines->phead; pe != NULL; pe = pe->pnext) {
168 		char* line = pe->value;
169 		char* p = line;
170 		char* key = p;
171 
172 		while (*p != 0 && !streqn(p, ips, ipslen))
173 			p++; // Advance by only 1 in case of subsequent match
174 		if (*p == 0) {
175 			lrec_put(prec, key, "", NO_FREE);
176 		} else {
177 			while (*p != 0 && streqn(p, ips, ipslen)) {
178 				*p = 0;
179 				p += ipslen;
180 			}
181 			lrec_put(prec, key, p, NO_FREE);
182 		}
183 	}
184 
185 	return prec;
186 }
187