1 /* Git-style configuration file parser for Grecs.
2 Copyright (C) 2011-2016 Sergey Poznyakoff
3
4 Grecs is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
8
9 Grecs is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with Grecs. If not, see <http://www.gnu.org/licenses/>. */
16
17 #ifdef HAVE_CONFIG_H
18 # include <config.h>
19 #endif
20 #include <stdlib.h>
21 #include <string.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <grecs.h>
25
26 static FILE *infile;
27 static int input_char;
28 static struct grecs_txtacc *acc;
29
30 #define TOK_EOF 0
31 #define TOK_EQ '='
32 #define TOK_SECTION 256
33 #define TOK_KEYWORD 257
34 #define TOK_VALUE 258
35 #define TOK_ERR -1
36
37 struct token {
38 int type;
39 char *buf;
40 char chbuf[2];
41 int putback;
42 struct grecs_list *path;
43 grecs_locus_t loc;
44 unsigned prev_col;
45 } tok;
46
47 #define ISSPACE(c) (strchr(" \t\r\f\n", c) != NULL)
48 #define ISIDENT(c) ((isascii(c) && isalnum(c)) || (c) == '_')
49 #define ISINITIAL(c) ((isascii(c) && isalpha(c)) || (c) == '_')
50
51 static int
rawinput()52 rawinput()
53 {
54 if (!infile || feof(infile))
55 return input_char = 0;
56 input_char = fgetc(infile);
57 if (input_char == '\n') {
58 tok.prev_col = grecs_current_locus_point.col;
59 grecs_locus_point_advance_line(grecs_current_locus_point);
60 } else if (input_char < 0)
61 input_char = 0;
62 else
63 grecs_current_locus_point.col++;
64 return input_char;
65 }
66
67 static int
input()68 input()
69 {
70 rawinput();
71 if (input_char == '#' || input_char == ';') {
72 while (rawinput() && input_char != '\n')
73 ;
74 }
75 return input_char;
76 }
77
78 static void
unput()79 unput()
80 {
81 if (!input_char)
82 return;
83 if (input_char == '\n') {
84 grecs_current_locus_point.line--;
85 grecs_current_locus_point.col = tok.prev_col;
86 } else
87 grecs_current_locus_point.col--;
88
89 ungetc(input_char, infile);
90 }
91
92 static void
error_recovery()93 error_recovery()
94 {
95 while (input() && input_char != '\n')
96 ;
97 }
98
99 static void
collect_unquoted()100 collect_unquoted()
101 {
102 do
103 grecs_txtacc_grow_char(acc, input_char);
104 while (input() &&
105 !(ISSPACE(input_char) || input_char == ']'));
106
107 }
108
109 static void
collect_subsection_name()110 collect_subsection_name()
111 {
112 do
113 grecs_txtacc_grow_char(acc, input_char);
114 while (input() &&
115 (isalnum(input_char) || input_char == '_' ||
116 input_char == '-'));
117 }
118
119 static void
collect_substring()120 collect_substring()
121 {
122 while (rawinput()) {
123 if (input_char == '\\') {
124 if (!input()) {
125 grecs_error(&tok.loc, 0,
126 "unexpected EOF in string");
127 break;
128 }
129 switch (input_char) {
130 case 'n':
131 input_char = '\n';
132 break;
133 case 't':
134 input_char = '\t';
135 break;
136 case 'b':
137 input_char = '\b';
138 }
139 } else if (input_char == '"')
140 break;
141 grecs_txtacc_grow_char(acc, input_char);
142 }
143 }
144
145 #define endpoint(t,adj) do { \
146 (t).loc.end = grecs_current_locus_point; \
147 if (adj) { \
148 if (input_char == '\n') \
149 (t).loc.end.col = (t).prev_col; \
150 else \
151 (t).loc.end.col -= (adj); \
152 } \
153 } while (0)
154
155 static void
gettoken(void)156 gettoken(void)
157 {
158 int putback = tok.putback;
159 tok.putback = 0;
160 if (putback) {
161 if (putback == '\n')
162 grecs_locus_point_advance_line(grecs_current_locus_point);
163 else
164 grecs_current_locus_point.col++;
165 return;
166 }
167
168 tok.buf = NULL;
169 /* Skip whitespace */
170 while (input() && ISSPACE(input_char))
171 ;
172
173 tok.loc.beg = grecs_current_locus_point;
174
175 if (input_char <= 0) {
176 tok.type = TOK_EOF;
177 endpoint(tok, 0);
178 return;
179 }
180
181 if (input_char == '[') {
182 int dot_delimited = -1;
183
184 tok.type = TOK_SECTION;
185 grecs_list_clear(tok.path);
186 input();
187 for (;;) {
188 char *p;
189
190 if (!dot_delimited)
191 while (ISSPACE(input_char))
192 input();
193 else {
194 if (input_char == ']')
195 break;
196 if (dot_delimited == 1)
197 input();
198 }
199
200 if (input_char == TOK_EOF) {
201 endpoint(tok, 0);
202 grecs_error(&tok.loc, 0,
203 "unexpected EOF in section header");
204 tok.type = TOK_ERR;
205 return;
206 }
207 if (input_char == ']')
208 break;
209 if (input_char == '\n') {
210 endpoint(tok, 1);
211 grecs_error(&tok.loc, 0,
212 "unexpect newline in in section header");
213 tok.type = TOK_ERR;
214 return;
215 }
216
217 if (dot_delimited != 1 && input_char == '"') {
218 collect_substring();
219 input();
220 dot_delimited = 0;
221 } else if (dot_delimited == 1)
222 collect_subsection_name();
223 else
224 collect_unquoted();
225 if (dot_delimited == -1)
226 dot_delimited = input_char == '.';
227 else if (dot_delimited == 1) {
228 if (input_char != '.' && input_char != ']') {
229 endpoint(tok, 1);
230 grecs_error(&tok.loc, 0,
231 "unexpected character in section header");
232 tok.type = TOK_ERR;
233 return;
234 }
235 }
236 grecs_txtacc_grow_char(acc, 0);
237 p = grecs_txtacc_finish(acc, 0);
238 grecs_list_append(tok.path, p);
239 }
240
241 endpoint(tok, 1);
242 if (grecs_list_size(tok.path) == 0) {
243 grecs_error(&tok.loc, 0, "empty section header");
244 tok.type = TOK_ERR;
245 return;
246 }
247
248 tok.type = TOK_SECTION;
249 return;
250 }
251
252 if (ISINITIAL(input_char)) {
253 tok.type = TOK_KEYWORD;
254 do
255 grecs_txtacc_grow_char(acc, input_char);
256 while (input() && ISIDENT(input_char));
257 unput();
258 grecs_txtacc_grow_char(acc, 0);
259 tok.buf = grecs_txtacc_finish(acc, 0);
260 endpoint(tok, 0);
261 return;
262 }
263
264 tok.chbuf[0] = input_char;
265 tok.chbuf[1] = 0;
266 tok.buf = tok.chbuf;
267 tok.type = input_char;
268 endpoint(tok, 0);
269 }
270
271 static void
collect_value()272 collect_value()
273 {
274 do {
275 if (input_char == '"') {
276 collect_substring();
277 if (input_char == '"')
278 continue;
279 else
280 break;
281 }
282 if (input_char == '\\') {
283 if (!rawinput())
284 break;
285 switch (input_char) {
286 case 'n':
287 input_char = '\n';
288 break;
289 case 't':
290 input_char = '\t';
291 break;
292 case 'b':
293 input_char = '\b';
294 }
295 }
296 grecs_txtacc_grow_char(acc, input_char);
297 } while (input() && input_char != '\n');
298 }
299
300 static struct grecs_value *
getvalue()301 getvalue()
302 {
303 int len;
304 struct grecs_value *val = grecs_malloc(sizeof(*val));
305
306 while (input() && ISSPACE(input_char) && input_char != '\n')
307 ;
308
309 val->locus.beg = grecs_current_locus_point;
310
311 if (input_char != '\n')
312 collect_value();
313 val->locus.end = grecs_current_locus_point;
314 val->locus.end.line--;
315 val->locus.end.col = tok.prev_col;
316
317 grecs_txtacc_grow_char(acc, 0);
318 tok.type = TOK_VALUE;
319 tok.buf = grecs_txtacc_finish(acc, 1);
320 len = strlen(tok.buf);
321 while (len > 0 && ISSPACE(tok.buf[len-1]))
322 tok.buf[--len] = 0;
323 val->type = GRECS_TYPE_STRING;
324 val->v.string = tok.buf;
325 return val;
326 }
327
328 static int
read_statement(struct grecs_node * parent)329 read_statement(struct grecs_node *parent)
330 {
331 struct grecs_node *node;
332
333 gettoken();
334 if (tok.type == TOK_EOF || tok.type == TOK_SECTION) {
335 tok.putback = 1;
336 return 0;
337 }
338 if (tok.type != TOK_KEYWORD) {
339 grecs_error(&tok.loc, 0, "syntax error");
340 error_recovery();
341 return 1;
342 }
343
344 node = grecs_node_create(grecs_node_stmt, &tok.loc);
345 node->ident = grecs_strdup(tok.buf);
346 node->idloc = tok.loc;
347
348 gettoken();
349 if (tok.type == TOK_EOF) {
350 grecs_error(&tok.loc, 0, "unexpected EOF");
351 grecs_node_free(node);
352 return 0;
353 }
354 if (tok.type != TOK_EQ) {
355 grecs_error(&tok.loc, 0,
356 "expected `=', but found `%s'", tok.buf);
357 error_recovery();
358 grecs_node_free(node);
359 return 1;
360 }
361 node->v.value = getvalue();
362 node->locus.end = node->v.value->locus.end;
363 grecs_node_bind(parent, node, 1);
364 return 1;
365 }
366
367 static void
read_statement_list(struct grecs_node * parent)368 read_statement_list(struct grecs_node *parent)
369 {
370 while (read_statement(parent))
371 ;
372 }
373
374 struct grecs_node *
create_subsection_node(struct grecs_node * root)375 create_subsection_node(struct grecs_node *root)
376 {
377 struct grecs_list_entry *ep;
378 struct grecs_node *p;
379
380 for (ep = tok.path->head; ep; ep = ep->next) {
381 char *ident = ep->data;
382 p = grecs_find_node(root, ident);
383 if (!p) {
384 p = grecs_node_create(grecs_node_block, &tok.loc);
385 p->ident = grecs_strdup(ident);
386 grecs_node_bind(root, p, 1);
387 }
388 root = p;
389 }
390 return root;
391 }
392
393 static int
read_section(struct grecs_node * parent)394 read_section(struct grecs_node *parent)
395 {
396 gettoken();
397 if (tok.type == TOK_EOF)
398 return 0;
399 else if (tok.type == TOK_SECTION) {
400 struct grecs_node *node = create_subsection_node(parent);
401 read_statement_list(node);
402 } else if (tok.type == TOK_KEYWORD) {
403 read_statement(parent);
404 } else {
405 grecs_error(&tok.loc, 0, "syntax error");
406 error_recovery();
407 }
408 return 1;
409 }
410
411 /* FIXME: traceflags not used */
412 struct grecs_node *
grecs_git_parser(const char * name,int traceflags)413 grecs_git_parser(const char *name, int traceflags)
414 {
415 struct grecs_node *root;
416
417 infile = fopen(name, "r");
418 if (!infile) {
419 grecs_error(NULL, errno, _("cannot open `%s'"), name);
420 return NULL;
421 }
422 grecs_current_locus_point.file = grecs_install_text(name);
423 grecs_current_locus_point.line = 1;
424 grecs_current_locus_point.col = 0;
425
426 acc = grecs_txtacc_create();
427 tok.path = grecs_list_create();
428 root = grecs_node_create(grecs_node_root, &tok.loc);
429
430 while (read_section(root))
431 ;
432 root->locus.end = grecs_current_locus_point;
433 fclose(infile);
434 grecs_txtacc_free(acc);
435 grecs_list_free(tok.path);
436 if (grecs_error_count) {
437 grecs_tree_free(root);
438 root = NULL;
439 }
440 return root;
441 }
442